Exemple #1
0
def exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    # db.cur.execute('SELECT tok.sent_id, tok.doc_id, sent.text FROM `annotator_token` tok, `annotator_sentence` sent WHERE tok.token="дом" and tok.sent_id=sent.id;')
    req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" '
    if flag:
        req1 += 'AND doc_id IN ('+','.join(docs) + ');'
    docs_len = int(db.execute(req1)[0][0])
    n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        n_req += 'AND doc_id IN ('+','.join(docs) + ');'
    sent_num = int(db.execute(n_req)[0][0])
    req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        req2 += 'AND doc_id IN ('+','.join(docs) + ')'
    req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')'
    req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences
    tokens = db.execute(req3)
    # tokens = Token.objects.filter(token__exact=word)
    e = defaultdict(list)
    for i, j in tokens:
        e[i].append(j)
    jq = []
    sent_list = [ShowSentence(i, e[i], expand) for i in e]
    for sent in sent_list:
        # sent.temp = bold(word, sent.tagged)
        # sent.save()
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, word, docs_len, sent_num
Exemple #2
0
def search(parameter, value):
    db = Database()
    out = []
    res = 0
    if parameter == 'name':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_name = %s OR f_name = %s)''', (value, value))
    elif parameter == 'surname':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_surname = %s OR f_surname = %s)''', (value, value))
    elif parameter == 'city':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_city = %s OR f_city = %s)''', (value, value))
    elif parameter == 'age':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_age = %s OR f_age = %s)''', (value, value))
    for el in res:
        out.append([el[1], el[2], el[3], el[4]])
        out.append([el[7], el[8], el[9], el[10]])
    if parameter == 'age':
        out = [el for el in out if int(value) in el]
    else:
        out = [el for el in out if value in el]
    unique = []
    for el in out:
        if el not in unique:
            unique.append(el)
    return unique
Exemple #3
0
def download_file(request, doc_id, doc_type):
    db = Database()
    if doc_type == 'ann':
        req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" % doc_id
        text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n'
        rows = db.execute(req)
        for row in rows:
            data = json.loads(row[1])
            text += '\t'.join([
                str(row[0]), data['quote'], data['corrs'], row[2],
                str(row[3]),
                str(row[4])
            ]) + '\r\n'
        response = HttpResponse(text, content_type='text/csv; charset=utf-8')
        response[
            'Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' % doc_id
        return response
    elif doc_type == u'text':
        req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" % doc_id
        text = ' '.join(
            h.unescape(i[0]).encode('cp1251') for i in db.execute(req))
        response = HttpResponse(text, content_type='text/plain')
        response['Content-Disposition'] = 'filename="text_%s.txt"' % doc_id
        return response
    else:
        req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" % doc_id
        rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join(
            u'\t'.join([str(row[2]), row[0],
                        str(row[1]), '', '', '']) for row in db.execute(req))
        response = HttpResponse(rows, content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' % doc_id
        return response
Exemple #4
0
def exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    word = word.split()[0]
    req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" '
    if flag:
        req1 += 'AND doc_id IN ('+','.join(docs) + ');'
    docs_len = int(db.execute(req1)[0][0])
    n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        n_req += 'AND doc_id IN ('+','.join(docs) + ');'
    sent_num = int(db.execute(n_req)[0][0])
    req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        req2 += 'AND doc_id IN ('+','.join(docs) + ')'
    req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')'
    if sentences != '()':
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences
        tokens = db.execute(req3)
    else:
        tokens = []
    # tokens = Token.objects.filter(token__exact=word)
    e = defaultdict(list)
    for i, j in tokens:
        e[i].append(j)
    jq = []
    sent_list = [ShowSentence(i, e[i], expand) for i in sorted(e)]
    ShowSentence.empty()
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, word, docs_len, sent_num
Exemple #5
0
def exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    word = word.split()[0]
    req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" '
    if flag:
        req1 += 'AND doc_id IN ('+','.join(docs) + ');'
    docs_len = int(db.execute(req1)[0][0])
    n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        n_req += 'AND doc_id IN ('+','.join(docs) + ');'
    sent_num = int(db.execute(n_req)[0][0])
    req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        req2 += 'AND doc_id IN ('+','.join(docs) + ')'
    req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')'
    if sentences != '()':
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences
        tokens = db.execute(req3)
    else:
        tokens = []
    # tokens = Token.objects.filter(token__exact=word)
    e = defaultdict(list)
    for i, j in tokens:
        e[i].append(j)
    jq = []
    sent_list = [ShowSentence(i, e[i], expand) for i in sorted(e)]
    ShowSentence.empty()
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, word, docs_len, sent_num
Exemple #6
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden"
    if u'rulec' in query:
        req += 'AND subcorpus="RULEC" '
    mode = query.get(u'mode').encode('utf-8')
    if mode != u'any':
        req += 'AND mode="'+ mode +'" '
    background = query.get(u'background').encode('utf-8')
    if background != u'any':
        req += 'AND language_background="'+ background +'" '
    gender = query.get(u'gender').encode('utf-8')
    if gender != u'any':
        req += 'AND gender="'+ gender +'" '
    date1 = query.get(u'date1')
    if date1 != u'':
        req += 'AND date1>='+ date1.encode('utf-8') +' '
    date2 = query.get(u'date2')
    if date2 != u'':
        req += 'AND date2<='+ date2.encode('utf-8') +' '
    language = query.getlist(u'language[]')
    if language != []:
        one = []
        for lang in language:
            one.append('native="'+ lang.encode('utf-8') +'"')
        if len(one) == 1:
            req += 'AND '+ one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    # with codecs.open('s.txt', 'w', encoding='utf-8') as f:
    #     f.write(req)
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')')
    flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True
    return docs, subsum[0][0], subsum[0][1], flag
Exemple #7
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 '
    if u'checked' in query:
        req += 'AND checked=True '
    if u'annotated' in query:
        req += 'AND annotated=True '
    gender = query.get(u'gender').encode('utf-8')
    if gender != u'any':
        req += 'AND gender="'+ gender +'" '
    date1 = query.get(u'date1')
    if date1 != u'':
        req += 'AND date1>='+ date1 +' '
    date2 = query.get(u'date2')
    if date2 != u'':
        req += 'AND date2<='+ date2 +' '
    genre = query.getlist(u'genre[]')
    if genre != []:
        req += make_small_query(genre, 'genre')
    major = query.getlist(u'major[]')
    if major != []:
        req += make_small_query(major, 'major')
    course = query.getlist(u'course[]')
    if course != []:
        req += make_small_query(course, 'course')
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')')
    flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True
    return docs, subsum[0][0], subsum[0][1], flag
Exemple #8
0
def insert_user_info(name, surname, city, user_age):
    db = Database()
    db.execute('''
    INSERT INTO user_info 
    (user_name, user_surname, user_city, user_age) 
    VALUES (%s, %s, %s, %s)
    ''', (name, surname, city, user_age))
    db.commit()
Exemple #9
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 '  # AND subcorpus NOT LIKE "hidden"
    if u'rulec' in query:
        req += 'AND subcorpus="RULEC" '
    mode = query.get(u'mode').encode('utf-8')
    if mode != u'any':
        req += 'AND mode="' + mode + '" '
    background = query.get(u'background').encode('utf-8')
    if background != u'any':
        req += 'AND language_background="' + background + '" '
    gender = query.get(u'gender').encode('utf-8')
    if gender != u'any':
        req += 'AND gender="' + gender + '" '
    date1 = query.get(u'date1')
    if date1 != u'':
        req += 'AND date1>=' + date1.encode('utf-8') + ' '
    date2 = query.get(u'date2')
    if date2 != u'':
        req += 'AND date2<=' + date2.encode('utf-8') + ' '
    language = query.getlist(u'language[]')
    if language != []:
        one = []
        for lang in language:
            one.append('native="' + lang.encode('utf-8') + '"')
        if len(one) == 1:
            req += 'AND ' + one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    glevel = query.getlist(u'generallevel[]')
    if glevel != []:
        one = []
        for l in glevel:
            one.append('general_level="' + l.encode('utf-8') + '"')
        if len(one) == 1:
            req += 'AND ' + one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    level = query.getlist(u'level[]')
    if level != []:
        one = []
        for l in level:
            one.append('level="' + l.encode('utf-8') + '"')
        if len(one) == 1:
            req += 'AND ' + one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f:
    #     f.write(req)
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    num_docs = Document.objects.count()
    subsum = db.execute(
        'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN ('
        + req + ')')
    flag = False if num_docs == len(docs) else True
    return docs, subsum[0][0], subsum[0][1], flag
Exemple #10
0
def exact_full_search(word, docs, flag, expand, page, per_page):
    db = Database()
    s = word
    words = word.split(' ')
    jq = []
    a = {}
    for wn in range(len(words)):
        w = words[wn]
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ w +'" '
        if flag:
            req3 += 'AND doc_id IN ('+','.join(docs) + ')'
        rows = db.execute(req3)
        e = defaultdict(list)
        if rows:
            for i, j in rows:
                e[i].append(j)
        if not a:
            a = SentBag(e, len(words))
        else:
            fr, t = wn, wn
            a.update(e, fr, t)
    a = a.finalize(len(words))
    sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)]
    ShowSentence.empty()
    sent_num = len(sent_list)
    d_num = len(set(i.doc_id for i in sent_list))
    sent_list = sorted(sent_list, key=lambda i: i.id)[per_page*(page-1):per_page*page]
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, s, d_num, sent_num
Exemple #11
0
def exact_full_search(word, docs, flag, expand, page, per_page):
    db = Database()
    s = word
    words = word.split(' ')
    jq = []
    a = {}
    for wn in range(len(words)):
        w = words[wn]
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ w +'" '
        if flag:
            req3 += 'AND doc_id IN ('+','.join(docs) + ')'
        rows = db.execute(req3)
        e = defaultdict(list)
        if rows:
            for i, j in rows:
                e[i].append(j)
        if not a:
            a = SentBag(e, len(words))
        else:
            fr, t = wn, wn
            a.update(e, fr, t)
    a = a.finalize(len(words))
    sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)]
    ShowSentence.empty()
    sent_num = len(sent_list)
    d_num = len(set(i.doc_id for i in sent_list))
    sent_list = sorted(sent_list, key=lambda i: i.id)[per_page*(page-1):per_page*page]
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, s, d_num, sent_num
Exemple #12
0
def complex_search(age, city, f_surname):
    db = Database()
    res = db.execute('''SELECT f_name, f_surname, f_age, f_city 
                        FROM friendship WHERE f_city = %s
                        AND friend_1 
                        IN (SELECT id FROM user_info WHERE user_surname = %s)
                        HAVING f_age > %s''', (city, f_surname, age))
    return res
Exemple #13
0
def insert_friend_info(user_name, user_surname, user_city, user_age, name, surname, city, age):
    db = Database()
    res = db.execute('''
    SELECT id FROM user_info
    WHERE user_name = %s AND user_surname = %s AND user_city = %s AND user_age = %s
    ''', (user_name, user_surname, user_city, user_age))
    print(res)
    try:
        user_id = res[0][0]
        db.execute('''
            INSERT INTO friendship
            (friend_1, f_name, f_surname, f_city, f_age)
            VALUES (%s, %s, %s, %s, %s)
            ''', (user_id, name, surname, city, age))
    except IndexError:
        db.execute('''
            INSERT INTO friendship
            (friend_name, f_surname, f_city, f_age)
            VALUES (%s, %s, %s, %s)
            ''', (name, surname, city, age))
    db.commit()
Exemple #14
0
def get_orig_sent(doc_id, num):
    db = Database()

    req = 'SELECT text FROM `annotator_originalsentence` ' \
            'WHERE doc_id_id={} AND num={}'.format(doc_id, num)
    # fw = open('log.txt', 'w')
    # fw.write(str(req))
    # fw.close()
    orig_sent = db.execute(req)[0]


    return orig_sent[0]
Exemple #15
0
def view_all():
    out = []
    db = Database()
    res = db.execute('SELECT * FROM user_info JOIN friendship', 0)
    for el in res:
        out.append([el[1], el[2], el[3], el[4]])
        out.append([el[7], el[8], el[9], el[10]])
    unique = []
    for el in out:
        if el not in unique:
            unique.append(el)
    return unique
Exemple #16
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 '  # AND subcorpus NOT LIKE "hidden"
    # if u'rulec' in query:
    #     req += 'AND subcorpus="RULEC" '
    # mode = query.get(u'mode').encode('utf-8')
    # if mode != u'any':
    #     req += 'AND mode="'+ mode +'" '
    # background = query.get(u'background').encode('utf-8')
    # if background != u'any':
    #     req += 'AND language_background="'+ background +'" '
    # gender = query.get(u'gender').encode('utf-8')
    # if gender != u'any':
    #     req += 'AND gender="'+ gender +'" '
    # date1 = query.get(u'date1')
    # if date1 != u'':
    #     req += 'AND date1>='+ date1.encode('utf-8') +' '
    # date2 = query.get(u'date2')
    # if date2 != u'':
    #     req += 'AND date2<='+ date2.encode('utf-8') +' '
    # language = query.getlist(u'language[]')
    # if language != []:
    #     one = []
    #     for lang in language:
    #         one.append('native="'+ lang.encode('utf-8') +'"')
    #     if len(one) == 1:
    #         req += 'AND '+ one[0]
    #     else:
    #         req += 'AND (' + ' OR '.join(one) + ')'
    # with codecs.open('s.txt', 'w', encoding='utf-8') as f:
    #     f.write(req)
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    subsum = db.execute(
        'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN ('
        + req + ')')
    flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True
    return docs, subsum[0][0], subsum[0][1], flag
Exemple #17
0
def download_file(request, doc_id, doc_type):
    db = Database()
    if doc_type == 'ann':
        req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" %doc_id
        text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n'
        rows = db.execute(req)
        for row in rows:
            data = json.loads(row[1])
            text += '\t'.join([str(row[0]), data['quote'], data['corrs'], row[2], str(row[3]), str(row[4])]) + '\r\n'
        response = HttpResponse(text, content_type='text/csv; charset=utf-8')
        response['Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' %doc_id
        return response
    elif doc_type == u'text':
        req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" %doc_id
        text = ' '.join(h.unescape(i[0]).encode('cp1251') for i in db.execute(req))
        response = HttpResponse(text, content_type='text/plain')
        response['Content-Disposition'] = 'filename="text_%s.txt"' %doc_id
        return response
    else:
        req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" %doc_id
        rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join(u'\t'.join([str(row[2]),row[0], str(row[1]), '', '', '']) for row in db.execute(req))
        response = HttpResponse(rows, content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' %doc_id
        return response
Exemple #18
0
def orig_exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    s = word
    words = word.split(' ')
    jq = []
    a = {}
    for wn in range(len(words)):
        w = words[wn]
        req4 = 'SELECT doc_id_id, num, text FROM `annotator_originalsentence` WHERE text REGEXP "'+ w +'" '
        if flag:
            req4 += 'AND doc_id_id IN ('+','.join(docs) + ')'
        rows = db.execute(req4)
        w = open('l.txt', 'a')
        w.write('\n')
        w.write(str(rows))
        w.close()
        sent_list = {}

        if rows:
            for sent in rows:
                # req5 = 'SELECT text FROM `annotator_sentence` WHERE doc_id_id="' + str(sent[0]) + '"AND num="' + str(sent[1]) + '" '
                # sents = db.execute(req5)

                # for s in sents:
                    # sent_list[sent] = s[0].encode('utf-8')
                sent_list[sent] = ShowSentence1(sent[0], sent[1], expand)
                # print(sent[0], sent[1], sent_list[sent].text)
            ShowSentence.empty()
    # w = open('l.txt', 'a')
    # w.write('\n')
    # w.write(str(sent_list))
    # w.close()
    # sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)]
    # ShowSentence.empty()
    sent_num = len(sent_list)
    d_num = len(set(sent[0] for sent in sent_list))
    # sent_list = sorted(sent_list, key=lambda i: i[0])[per_page*(page-1):per_page*page]
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent[1])))

    # w = open('l.txt', 'a')
    # w.write('\n')
    # w.write(str(a))
    # w.close()
    return jq, sent_list, s, d_num, sent_num
Exemple #19
0
def collect_data(arr):
    word, lex, gram, err, docs, flag = arr
    if all(i=="" for i in [word, lex, gram, err]):
        return []
    if [word, lex, gram] == ["", "", ""] and err != '':
        req = '''SELECT DISTINCT document_id, start, end FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != '']
        for er in errs:
            req += 'AND tag REGEXP "[[:<:]]' + er + '[[:>:]]" '
        if flag:
            req += 'AND doc_id_id IN ('+','.join(docs)+');'
    else:
        if err != '':
            req = '''SELECT DISTINCT sent_id, num FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 '''
            errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != '']
            for er in errs:
                req += 'AND tag LIKE "%' + er + '%" '
            req += 'AND num>= annotator_annotation.start AND num <= annotator_annotation.end '
        else:
            req = '''SELECT DISTINCT sent_id, num FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        if word != '':
            req += 'AND lem="'+word+'" '
        if lex != '':
            req += 'AND lex LIKE "%' + lex + '%" '
        if gram != '':
            req += parse_gram(gram)
        if flag:
            req += 'AND doc_id IN ('+','.join(docs)+');'
    # f = codecs.open('s.txt', 'w')
    # f.write(req)
    # f.close()
    db = Database()
    rows = db.execute(req)
    return rows
Exemple #20
0
def make_tables():
    db = Database()
    db.execute("DROP TABLE IF EXISTS word_info;", 0)
    db.execute(
        """CREATE TABLE word_info
                    (word_id INTEGER PRIMARY KEY AUTOINCREMENT,
                    word TEXT, 
                    definition TEXT,
                    updater TEXT);
                           """, 0)
    db.execute("DROP TABLE IF EXISTS examples;", 0)
    db.execute(
        """CREATE TABLE examples
                    (word_id INTEGER, 
                    example TEXT);
                            """, 0)
    db.execute("DROP TABLE IF EXISTS updaters;", 0)
    db.execute(
        """CREATE TABLE updaters
                    (author_id INTEGER PRIMARY KEY AUTOINCREMENT,
                    city TEXT,
                    date DATE);
                            """, 0)
    db.commit()
Exemple #21
0
def collect_full_data(arr):
    db = Database()
    word, lex, gram, err, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000':
        return [], 0, 0
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req = '''SELECT DISTINCT sent_id, num FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req += 'AND doc_id IN ('+','.join(docs)+')'
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    rows = db.execute(req)
    # sent_num = int(db.execute(n_req)[0][0])
    # d_num = int(db.execute(d_req)[0][0])
    return rows, 0,0
Exemple #22
0
def view_registered():
    db = Database()
    res = db.execute('''SELECT * FROM user_info''', 0)
    return res
Exemple #23
0
def collect_data(arr):
    db = Database()
    word, lex, gram, err, comment, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000' or (flag and len(docs) == 0):
        return [], 0, 0
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' % \
                       (parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \
                       %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f:
    #     f.write(req1)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')'
    if sentences == '()':
        return [], 0, 0
    if s == '0001':
        req += ' AND document_id IN ' + sentences
    else:
        req += ' AND sent_id IN ' + sentences
    rows = db.execute(req)
    sent_num = int(db.execute(n_req)[0][0])
    d_num = int(db.execute(d_req)[0][0])
    return rows, sent_num, d_num
Exemple #24
0
def collect_full_data(arr):
    db = Database()
    word, lex, gram, err, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000' or (flag and len(docs) == 0):
        return [], 0, 0
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \
                       %(parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req = '''SELECT DISTINCT sent_id, num FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req += 'AND doc_id IN ('+','.join(docs)+')'
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \
                       %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    rows = db.execute(req)
    return rows, 0,0
Exemple #25
0
def collect_data(arr):
    db = Database()
    word, lex, gram, err, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000':
        return []
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)

    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')'
    if sentences == '()':
        return [], 0, 0
    if s == '0001':
        req += ' AND document_id IN ' + sentences
    else:
        req += ' AND sent_id IN ' + sentences
    f = codecs.open('/home/elmira/learner_corpus/tempfiles/s.txt', 'w')
    f.write(req + '\r\n' + n_req + '\r\n' + d_req)
    f.close()
    rows = db.execute(req)
    sent_num = int(db.execute(n_req)[0][0])
    d_num = int(db.execute(d_req)[0][0])
    return rows, sent_num, d_num