def do_test_mysql():
    conn = boilerplate.get_mysql_connection()
    cur = conn.cursor()
    cur.execute("SELECT table_name, column_name FROM INFORMATION_SCHEMA.COLUMNS")
    schema = dict()
    for table_name, column_name in cur.fetchall():
        schema.setdefault(table_name.decode('utf-8'), []).append(column_name)
    return {"schema": schema}
Example #2
0
def test_mysql_endpoint():
    conn = boilerplate.get_mysql_connection()
    cur = conn.cursor()
    cur.execute("show tables in `hse-api-database`")
    schema = dict()
    for table_name in cur.fetchall():
        schema.setdefault(table_name[0].decode('utf-8'), [])
    return jsonify({"schema": schema})
Example #3
0
def process_data(file):
    """Split all files contents and then combine unique words into resulting file.
    """
    # result = set()
    #
    # for _, contents in data_to_process.items():
    #     if isinstance(contents, bytes):
    #         text = contents.decode('utf-8')
    #     else:
    #         text = contents
    #     result |= set([word + "!!!" for word in text.split()])
    #
    # if result:
    #     yield None, '\n'.join(sorted(list(result)))
    conn = boilerplate.get_mysql_connection()
    cur = conn.cursor()
    print(file)
    name = file[:-4]
    print(name)
    cur.execute("SELECT table_name from information_schema.tables where \
        table_schema = 'hse-api-database' and table_name = '%s'", name)
    resp = cur.fetchone()
    print(resp)
    try:
        text = boilerplate.get_file(file).decode('utf-8')
        if name == 'main':
            f = [tuple(x.split(';')) for x in text.split('\n')]
        else:
            f = [tuple(x.split(',')[1:]) for x in text.split('\n')]
        print(f[:5])
        cur.execute("CREATE TABLE `hse-api-database`.{} \
            (word varchar(300), lemma varchar(300), morphs varchar(300), categories varchar(100))".format(name))
        for tup in f:
            try:
                cur.execute("INSERT INTO `hse-api-database`.{}(word,lemma,morphs,categories)\
                    VALUES(%s, %s, %s, %s)".format(name), tup)
                # print("INSERT INTO `hse-api-database`.{}(word,lemma,morphs,categories)\
                #     VALUES(%s, %s, %s, %s)".format(name))
            except:
                print(tup)
                raise
        conn.commit()
        return name, text
    except:
        pass
Example #4
0
def query_endpoint():
    if request.method == 'POST':
        conn = boilerplate.get_mysql_connection()
        cur = conn.cursor()
        print(request.json)
        if request.json['base'] == 'all':
            tables = ['main']
        else:
            # проверяем, одна таблица или несколько
            if isinstance(request.json['tables'], str):
                tables = [
                    request.json['tables'],
                ]
            else:
                tables = request.json['tables']
        res = []
        if not safe_check(tables):
            return jsonify({"error": boilerplate.ERROR_NO_SUCH_FILE})
        string = request.json['string']
        for table in tables:
            print(table)
            if 'regexp' in request.json:
                sql = "SELECT word FROM `hse-api-database`.{} WHERE word REGEXP %(string)s;".format(
                    table)
                cur.execute(sql, {'string': string})
            else:
                string = '%{}%'.format(string)
                sql = "SELECT word FROM `hse-api-database`.{} WHERE word LIKE %(string)s".format(
                    table)
                print(sql)
                cur.execute(sql, {'string': string})
            res += [x[0] for x in cur.fetchall()]
            print(res)
        with open('results.csv', 'w') as f:
            f.write('\n'.join(res))
        return '\n'.join(res)
    else:
        return jsonify({"error": boilerplate.ERROR_NO_SUCH_FILE})
Example #5
0
from boilerplate import get_mysql_connection
import defusedxml
from defusedxml.common import EntitiesForbidden
from xlrd import open_workbook
from tqdm import tqdm

defusedxml.defuse_stdlib()
CON = get_mysql_connection()
CUR = CON.cursor(dictionary=True)
COLLOCATION_TABLES_MAPPING = {
    'Лингвистика': 'linguistics',
    'Социология': 'sociology',
    'Политология': 'politology',
    'Юриспруденция': 'law',
    'Психология и педагогика': 'psychology',
    'Экономика': 'economics'
}


def write_to_db_metas(data_to_process):
    """
    Upload metadata
    """
    CUR.execute("SHOW TABLES")
    result = CUR.fetchall()
    tables = [
        list(dictionary.values())[0].decode("utf-8") for dictionary in result
    ]
    if 'meta_cat' not in tables:
        create_table_metas = "CREATE TABLE IF NOT EXISTS meta_cat (id_text INT AUTO_INCREMENT, domain VARCHAR(255), year INT, author VARCHAR(255), journal VARCHAR(255), source VARCHAR(255), article_name VARCHAR(255), PRIMARY KEY (id_text))"
        CUR.execute(create_table_metas)