Beispiel #1
0
def exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    word = word.split()[0]
    req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" '
    if flag:
        req1 += 'AND doc_id IN ('+','.join(docs) + ');'
    docs_len = int(db.execute(req1)[0][0])
    n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        n_req += 'AND doc_id IN ('+','.join(docs) + ');'
    sent_num = int(db.execute(n_req)[0][0])
    req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        req2 += 'AND doc_id IN ('+','.join(docs) + ')'
    req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')'
    if sentences != '()':
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences
        tokens = db.execute(req3)
    else:
        tokens = []
    # tokens = Token.objects.filter(token__exact=word)
    e = defaultdict(list)
    for i, j in tokens:
        e[i].append(j)
    jq = []
    sent_list = [ShowSentence(i, e[i], expand) for i in sorted(e)]
    ShowSentence.empty()
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, word, docs_len, sent_num
Beispiel #2
0
def exact_full_search(word, docs, flag, expand, page, per_page):
    db = Database()
    s = word
    words = word.split(' ')
    jq = []
    a = {}
    for wn in range(len(words)):
        w = words[wn]
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ w +'" '
        if flag:
            req3 += 'AND doc_id IN ('+','.join(docs) + ')'
        rows = db.execute(req3)
        e = defaultdict(list)
        if rows:
            for i, j in rows:
                e[i].append(j)
        if not a:
            a = SentBag(e, len(words))
        else:
            fr, t = wn, wn
            a.update(e, fr, t)
    a = a.finalize(len(words))
    sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)]
    ShowSentence.empty()
    sent_num = len(sent_list)
    d_num = len(set(i.doc_id for i in sent_list))
    sent_list = sorted(sent_list, key=lambda i: i.id)[per_page*(page-1):per_page*page]
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, s, d_num, sent_num
Beispiel #3
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden"
    if u'rulec' in query:
        req += 'AND subcorpus="RULEC" '
    mode = query.get(u'mode').encode('utf-8')
    if mode != u'any':
        req += 'AND mode="'+ mode +'" '
    background = query.get(u'background').encode('utf-8')
    if background != u'any':
        req += 'AND language_background="'+ background +'" '
    gender = query.get(u'gender').encode('utf-8')
    if gender != u'any':
        req += 'AND gender="'+ gender +'" '
    date1 = query.get(u'date1')
    if date1 != u'':
        req += 'AND date1>='+ date1.encode('utf-8') +' '
    date2 = query.get(u'date2')
    if date2 != u'':
        req += 'AND date2<='+ date2.encode('utf-8') +' '
    language = query.getlist(u'language[]')
    if language != []:
        one = []
        for lang in language:
            one.append('native="'+ lang.encode('utf-8') +'"')
        if len(one) == 1:
            req += 'AND '+ one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    # with codecs.open('s.txt', 'w', encoding='utf-8') as f:
    #     f.write(req)
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')')
    flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True
    return docs, subsum[0][0], subsum[0][1], flag
Beispiel #4
0
def exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    # db.cur.execute('SELECT tok.sent_id, tok.doc_id, sent.text FROM `annotator_token` tok, `annotator_sentence` sent WHERE tok.token="дом" and tok.sent_id=sent.id;')
    req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" '
    if flag:
        req1 += 'AND doc_id IN ('+','.join(docs) + ');'
    docs_len = int(db.execute(req1)[0][0])
    n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        n_req += 'AND doc_id IN ('+','.join(docs) + ');'
    sent_num = int(db.execute(n_req)[0][0])
    req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        req2 += 'AND doc_id IN ('+','.join(docs) + ')'
    req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')'
    req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences
    tokens = db.execute(req3)
    # tokens = Token.objects.filter(token__exact=word)
    e = defaultdict(list)
    for i, j in tokens:
        e[i].append(j)
    jq = []
    sent_list = [ShowSentence(i, e[i], expand) for i in e]
    for sent in sent_list:
        # sent.temp = bold(word, sent.tagged)
        # sent.save()
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, word, docs_len, sent_num
Beispiel #5
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 '
    if u'checked' in query:
        req += 'AND checked=True '
    if u'annotated' in query:
        req += 'AND annotated=True '
    gender = query.get(u'gender').encode('utf-8')
    if gender != u'any':
        req += 'AND gender="'+ gender +'" '
    date1 = query.get(u'date1')
    if date1 != u'':
        req += 'AND date1>='+ date1 +' '
    date2 = query.get(u'date2')
    if date2 != u'':
        req += 'AND date2<='+ date2 +' '
    genre = query.getlist(u'genre[]')
    if genre != []:
        req += make_small_query(genre, 'genre')
    major = query.getlist(u'major[]')
    if major != []:
        req += make_small_query(major, 'major')
    course = query.getlist(u'course[]')
    if course != []:
        req += make_small_query(course, 'course')
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')')
    flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True
    return docs, subsum[0][0], subsum[0][1], flag
 async def startup(self):
     self.db = Database(DATABASE['GDAX'], migrate=False)
     self.polo_db = Database(DATABASE['POLO'], migrate=False)
     self.migrate = True
     if self.migrate:
         await self.db.migrate()
         await self.polo_db.migrate()
Beispiel #7
0
def download_file(request, doc_id, doc_type):
    db = Database()
    if doc_type == 'ann':
        req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" % doc_id
        text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n'
        rows = db.execute(req)
        for row in rows:
            data = json.loads(row[1])
            text += '\t'.join([
                str(row[0]), data['quote'], data['corrs'], row[2],
                str(row[3]),
                str(row[4])
            ]) + '\r\n'
        response = HttpResponse(text, content_type='text/csv; charset=utf-8')
        response[
            'Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' % doc_id
        return response
    elif doc_type == u'text':
        req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" % doc_id
        text = ' '.join(
            h.unescape(i[0]).encode('cp1251') for i in db.execute(req))
        response = HttpResponse(text, content_type='text/plain')
        response['Content-Disposition'] = 'filename="text_%s.txt"' % doc_id
        return response
    else:
        req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" % doc_id
        rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join(
            u'\t'.join([str(row[2]), row[0],
                        str(row[1]), '', '', '']) for row in db.execute(req))
        response = HttpResponse(rows, content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' % doc_id
        return response
Beispiel #8
0
def exact_full_search(word, docs, flag, expand, page, per_page):
    db = Database()
    s = word
    words = word.split(' ')
    jq = []
    a = {}
    for wn in range(len(words)):
        w = words[wn]
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ w +'" '
        if flag:
            req3 += 'AND doc_id IN ('+','.join(docs) + ')'
        rows = db.execute(req3)
        e = defaultdict(list)
        if rows:
            for i, j in rows:
                e[i].append(j)
        if not a:
            a = SentBag(e, len(words))
        else:
            fr, t = wn, wn
            a.update(e, fr, t)
    a = a.finalize(len(words))
    sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)]
    ShowSentence.empty()
    sent_num = len(sent_list)
    d_num = len(set(i.doc_id for i in sent_list))
    sent_list = sorted(sent_list, key=lambda i: i.id)[per_page*(page-1):per_page*page]
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, s, d_num, sent_num
Beispiel #9
0
def complex_search(age, city, f_surname):
    db = Database()
    res = db.execute('''SELECT f_name, f_surname, f_age, f_city 
                        FROM friendship WHERE f_city = %s
                        AND friend_1 
                        IN (SELECT id FROM user_info WHERE user_surname = %s)
                        HAVING f_age > %s''', (city, f_surname, age))
    return res
Beispiel #10
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 '  # AND subcorpus NOT LIKE "hidden"
    if u'rulec' in query:
        req += 'AND subcorpus="RULEC" '
    mode = query.get(u'mode').encode('utf-8')
    if mode != u'any':
        req += 'AND mode="' + mode + '" '
    background = query.get(u'background').encode('utf-8')
    if background != u'any':
        req += 'AND language_background="' + background + '" '
    gender = query.get(u'gender').encode('utf-8')
    if gender != u'any':
        req += 'AND gender="' + gender + '" '
    date1 = query.get(u'date1')
    if date1 != u'':
        req += 'AND date1>=' + date1.encode('utf-8') + ' '
    date2 = query.get(u'date2')
    if date2 != u'':
        req += 'AND date2<=' + date2.encode('utf-8') + ' '
    language = query.getlist(u'language[]')
    if language != []:
        one = []
        for lang in language:
            one.append('native="' + lang.encode('utf-8') + '"')
        if len(one) == 1:
            req += 'AND ' + one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    glevel = query.getlist(u'generallevel[]')
    if glevel != []:
        one = []
        for l in glevel:
            one.append('general_level="' + l.encode('utf-8') + '"')
        if len(one) == 1:
            req += 'AND ' + one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    level = query.getlist(u'level[]')
    if level != []:
        one = []
        for l in level:
            one.append('level="' + l.encode('utf-8') + '"')
        if len(one) == 1:
            req += 'AND ' + one[0]
        else:
            req += 'AND (' + ' OR '.join(one) + ')'
    # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f:
    #     f.write(req)
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    num_docs = Document.objects.count()
    subsum = db.execute(
        'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN ('
        + req + ')')
    flag = False if num_docs == len(docs) else True
    return docs, subsum[0][0], subsum[0][1], flag
Beispiel #11
0
def get_orig_sent(doc_id, num):
    db = Database()

    req = 'SELECT text FROM `annotator_originalsentence` ' \
            'WHERE doc_id_id={} AND num={}'.format(doc_id, num)
    # fw = open('log.txt', 'w')
    # fw.write(str(req))
    # fw.close()
    orig_sent = db.execute(req)[0]


    return orig_sent[0]
Beispiel #12
0
def view_all():
    out = []
    db = Database()
    res = db.execute('SELECT * FROM user_info JOIN friendship', 0)
    for el in res:
        out.append([el[1], el[2], el[3], el[4]])
        out.append([el[7], el[8], el[9], el[10]])
    unique = []
    for el in out:
        if el not in unique:
            unique.append(el)
    return unique
    def __init__(self):

        if CronJobManager.__instance != None:
            raise Exception("This class is a singleton!")
        else:
            self.update_cron_start_time()
            self.db_manager = Database.get_instance()
            self.dmm_ripper = DMMRipper.get_instance(
                CronJobManager.webdriver_config)
            jobstores = {
                # 'alchemy': SQLAlchemyJobStore(url='sqlite:///jobs.sqlite'),
                'default': MemoryJobStore()
            }
            executors = {
                'default': {
                    'type': 'threadpool',
                    'max_workers': 20
                },
                'processpool': ProcessPoolExecutor(max_workers=5)
            }
            job_defaults = {'coalesce': False, 'max_instances': 3}
            self.scheduler = BackgroundScheduler()
            self.scheduler.configure(jobstores=jobstores,
                                     executors=executors,
                                     job_defaults=job_defaults,
                                     timezone=CronJobManager.time_zone,
                                     daemon=False)
            self.scheduler.start()
            CronJobManager.__instance = self
Beispiel #14
0
 def __init__(self, max_download_size, download_path, lang, initial_state):
     self.db_manager = Database.get_instance()
     self.scheduler = CronJobManager.get_instance()
     self.logger = logging.getLogger(__name__)
     self.download_path = utils.get_abs_path(download_path)
     self.max_download_size = max_download_size
     self.lang = lang
     self.initial_state = initial_state
     self.PROCESS_PASSWORD = range(
         initial_state, initial_state + BookDownloadHandler.num_states
     )
     self.entry_points = [CallbackQueryHandler(
         self.callback, pass_user_data=True
     )]
     self.states = {
         self.PROCESS_PASSWORD: [RegexHandler(
             '.*', self.process_password, pass_user_data=True
         )]
     }
     self.fallbacks=[RegexHandler('3248BC7547CE97B2A197B2A06CF7283D',
         self.cancel)]
     ConversationHandler.__init__(
         self,
         entry_points=self.entry_points,
         states=self.states,
         fallbacks=self.fallbacks,
         per_chat=False
     )
Beispiel #15
0
def orig_exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    s = word
    words = word.split(' ')
    jq = []
    a = {}
    for wn in range(len(words)):
        w = words[wn]
        req4 = 'SELECT doc_id_id, num, text FROM `annotator_originalsentence` WHERE text REGEXP "'+ w +'" '
        if flag:
            req4 += 'AND doc_id_id IN ('+','.join(docs) + ')'
        rows = db.execute(req4)
        w = open('l.txt', 'a')
        w.write('\n')
        w.write(str(rows))
        w.close()
        sent_list = {}

        if rows:
            for sent in rows:
                # req5 = 'SELECT text FROM `annotator_sentence` WHERE doc_id_id="' + str(sent[0]) + '"AND num="' + str(sent[1]) + '" '
                # sents = db.execute(req5)

                # for s in sents:
                    # sent_list[sent] = s[0].encode('utf-8')
                sent_list[sent] = ShowSentence1(sent[0], sent[1], expand)
                # print(sent[0], sent[1], sent_list[sent].text)
            ShowSentence.empty()
    # w = open('l.txt', 'a')
    # w.write('\n')
    # w.write(str(sent_list))
    # w.close()
    # sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)]
    # ShowSentence.empty()
    sent_num = len(sent_list)
    d_num = len(set(sent[0] for sent in sent_list))
    # sent_list = sorted(sent_list, key=lambda i: i[0])[per_page*(page-1):per_page*page]
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent[1])))

    # w = open('l.txt', 'a')
    # w.write('\n')
    # w.write(str(a))
    # w.close()
    return jq, sent_list, s, d_num, sent_num
Beispiel #16
0
def collect_data(arr):
    word, lex, gram, err, docs, flag = arr
    if all(i=="" for i in [word, lex, gram, err]):
        return []
    if [word, lex, gram] == ["", "", ""] and err != '':
        req = '''SELECT DISTINCT document_id, start, end FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != '']
        for er in errs:
            req += 'AND tag REGEXP "[[:<:]]' + er + '[[:>:]]" '
        if flag:
            req += 'AND doc_id_id IN ('+','.join(docs)+');'
    else:
        if err != '':
            req = '''SELECT DISTINCT sent_id, num FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 '''
            errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != '']
            for er in errs:
                req += 'AND tag LIKE "%' + er + '%" '
            req += 'AND num>= annotator_annotation.start AND num <= annotator_annotation.end '
        else:
            req = '''SELECT DISTINCT sent_id, num FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        if word != '':
            req += 'AND lem="'+word+'" '
        if lex != '':
            req += 'AND lex LIKE "%' + lex + '%" '
        if gram != '':
            req += parse_gram(gram)
        if flag:
            req += 'AND doc_id IN ('+','.join(docs)+');'
    # f = codecs.open('s.txt', 'w')
    # f.write(req)
    # f.close()
    db = Database()
    rows = db.execute(req)
    return rows
Beispiel #17
0
    def __init__(self):
        self.url = "wss://ws-feed.gdax.com"
        self.public_client = ccxt.gdax()

        self.product_ids = GDAX_PRODUCT_IDS

        self.order_books = {x: {} for x in self.product_ids}
        self.inside_order_books = {
            x: {"bids": {}, "asks": {}} for x in self.product_ids
        }
        self.last_trade_ids = {x: None for x in self.product_ids}

        self.db = Database(DATABASE['GDAX'], migrate=True)

        self.ws = websocket.WebSocketApp(
            self.url,
            on_message=self.on_message,
            on_error=self.on_error,
            on_open=self.on_open,
        )
Beispiel #18
0
def exact_search(word, docs, flag, expand, page, per_page):
    db = Database()
    word = word.split()[0]
    req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" '
    if flag:
        req1 += 'AND doc_id IN ('+','.join(docs) + ');'
    docs_len = int(db.execute(req1)[0][0])
    n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        n_req += 'AND doc_id IN ('+','.join(docs) + ');'
    sent_num = int(db.execute(n_req)[0][0])
    req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" '
    if flag:
        req2 += 'AND doc_id IN ('+','.join(docs) + ')'
    req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')'
    if sentences != '()':
        req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences
        tokens = db.execute(req3)
    else:
        tokens = []
    # tokens = Token.objects.filter(token__exact=word)
    e = defaultdict(list)
    for i, j in tokens:
        e[i].append(j)
    jq = []
    sent_list = [ShowSentence(i, e[i], expand) for i in sorted(e)]
    ShowSentence.empty()
    for sent in sent_list:
        jq.append(jquery.replace('***', str(sent.id)))
    return jq, sent_list, word, docs_len, sent_num
Beispiel #19
0
def search(parameter, value):
    db = Database()
    out = []
    res = 0
    if parameter == 'name':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_name = %s OR f_name = %s)''', (value, value))
    elif parameter == 'surname':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_surname = %s OR f_surname = %s)''', (value, value))
    elif parameter == 'city':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_city = %s OR f_city = %s)''', (value, value))
    elif parameter == 'age':
        res = db.execute('''SELECT * 
                        FROM user_info JOIN friendship 
                        ON (user_age = %s OR f_age = %s)''', (value, value))
    for el in res:
        out.append([el[1], el[2], el[3], el[4]])
        out.append([el[7], el[8], el[9], el[10]])
    if parameter == 'age':
        out = [el for el in out if int(value) in el]
    else:
        out = [el for el in out if value in el]
    unique = []
    for el in out:
        if el not in unique:
            unique.append(el)
    return unique
Beispiel #20
0
def insert_user_info(name, surname, city, user_age):
    db = Database()
    db.execute('''
    INSERT INTO user_info 
    (user_name, user_surname, user_city, user_age) 
    VALUES (%s, %s, %s, %s)
    ''', (name, surname, city, user_age))
    db.commit()
Beispiel #21
0
def get_subcorpus(query):
    req = 'SELECT id FROM `annotator_document` WHERE 1 '  # AND subcorpus NOT LIKE "hidden"
    # if u'rulec' in query:
    #     req += 'AND subcorpus="RULEC" '
    # mode = query.get(u'mode').encode('utf-8')
    # if mode != u'any':
    #     req += 'AND mode="'+ mode +'" '
    # background = query.get(u'background').encode('utf-8')
    # if background != u'any':
    #     req += 'AND language_background="'+ background +'" '
    # gender = query.get(u'gender').encode('utf-8')
    # if gender != u'any':
    #     req += 'AND gender="'+ gender +'" '
    # date1 = query.get(u'date1')
    # if date1 != u'':
    #     req += 'AND date1>='+ date1.encode('utf-8') +' '
    # date2 = query.get(u'date2')
    # if date2 != u'':
    #     req += 'AND date2<='+ date2.encode('utf-8') +' '
    # language = query.getlist(u'language[]')
    # if language != []:
    #     one = []
    #     for lang in language:
    #         one.append('native="'+ lang.encode('utf-8') +'"')
    #     if len(one) == 1:
    #         req += 'AND '+ one[0]
    #     else:
    #         req += 'AND (' + ' OR '.join(one) + ')'
    # with codecs.open('s.txt', 'w', encoding='utf-8') as f:
    #     f.write(req)
    db = Database()
    docs = [str(i[0]) for i in db.execute(req)]
    subsum = db.execute(
        'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN ('
        + req + ')')
    flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True
    return docs, subsum[0][0], subsum[0][1], flag
Beispiel #22
0
def download_file(request, doc_id, doc_type):
    db = Database()
    if doc_type == 'ann':
        req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" %doc_id
        text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n'
        rows = db.execute(req)
        for row in rows:
            data = json.loads(row[1])
            text += '\t'.join([str(row[0]), data['quote'], data['corrs'], row[2], str(row[3]), str(row[4])]) + '\r\n'
        response = HttpResponse(text, content_type='text/csv; charset=utf-8')
        response['Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' %doc_id
        return response
    elif doc_type == u'text':
        req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" %doc_id
        text = ' '.join(h.unescape(i[0]).encode('cp1251') for i in db.execute(req))
        response = HttpResponse(text, content_type='text/plain')
        response['Content-Disposition'] = 'filename="text_%s.txt"' %doc_id
        return response
    else:
        req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" %doc_id
        rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join(u'\t'.join([str(row[2]),row[0], str(row[1]), '', '', '']) for row in db.execute(req))
        response = HttpResponse(rows, content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' %doc_id
        return response
Beispiel #23
0
    def __init__(self):
        self.url = "wss://api2.poloniex.com"
        #self.public_client = polo.PublicClient()

        self.product_ids = POLO_PRODUCT_IDS
        self.product_codes = {}
        self.order_books = {x: {} for x in self.product_ids}
        self.inside_order_books = {
            x: {
                "bids": {},
                "asks": {}
            }
            for x in self.product_ids
        }
        self.last_trade_ids = {x: None for x in self.product_ids}

        self.db = Database(DATABASE['POLO'], migrate=True)

        self.ws = websocket.WebSocketApp(
            self.url,
            on_message=self.on_message,
            on_error=self.on_error,
            on_open=self.on_open,
        )
Beispiel #24
0
def insert_friend_info(user_name, user_surname, user_city, user_age, name, surname, city, age):
    db = Database()
    res = db.execute('''
    SELECT id FROM user_info
    WHERE user_name = %s AND user_surname = %s AND user_city = %s AND user_age = %s
    ''', (user_name, user_surname, user_city, user_age))
    print(res)
    try:
        user_id = res[0][0]
        db.execute('''
            INSERT INTO friendship
            (friend_1, f_name, f_surname, f_city, f_age)
            VALUES (%s, %s, %s, %s, %s)
            ''', (user_id, name, surname, city, age))
    except IndexError:
        db.execute('''
            INSERT INTO friendship
            (friend_name, f_surname, f_city, f_age)
            VALUES (%s, %s, %s, %s)
            ''', (name, surname, city, age))
    db.commit()
 def __init__(self, lang, language_codes, initial_state):
     self.db_manager = Database.get_instance()
     self.scheduler = CronJobManager.get_instance()
     self.logger = logging.getLogger(__name__)
     self.lang = lang
     self.language_codes = language_codes
     self.initial_state = initial_state
     self.LANGUAGE, self.EMAIL, self.PASSWORD, self.STORE_PASS = range(
         initial_state, initial_state + StartWizard.num_states)
     self.entry_points = [CommandHandler('start', self.start)]
     self.states = {
         self.LANGUAGE: [RegexHandler('.*', self.language)],
         self.EMAIL: [RegexHandler('.*', self.email)],
         self.STORE_PASS: [RegexHandler('.*', self.save_credentials)],
         self.PASSWORD: [RegexHandler('.*', self.password)],
     }
     self.fallbacks = [
         CommandHandler('10aec35353f9c4096a71c38654c3d402', self.cancel)
     ]
     ConversationHandler.__init__(self,
                                  entry_points=self.entry_points,
                                  states=self.states,
                                  fallbacks=self.fallbacks)
Beispiel #26
0
def collect_full_data(arr):
    db = Database()
    word, lex, gram, err, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000' or (flag and len(docs) == 0):
        return [], 0, 0
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \
                       %(parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req = '''SELECT DISTINCT sent_id, num FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req += 'AND doc_id IN ('+','.join(docs)+')'
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \
                       %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    rows = db.execute(req)
    return rows, 0,0
Beispiel #27
0
def view_registered():
    db = Database()
    res = db.execute('''SELECT * FROM user_info''', 0)
    return res
Beispiel #28
0
import sys
import csv
import sqlite3

from db_utils import Database
from config import DATABASE

db = Database(DATABASE['GDAX'], row_factory=sqlite3.Row)

data = db._execute("SELECT * FROM gdax_order_book", {}, fetch=True)

titles = data[0].keys()
mode = "wb" if sys.version_info < (3, ) else "w"

with open('order_books.csv', mode) as f:
    writer = csv.writer(f, delimiter=',')
    writer.writerow(titles)  # keys=title you're looking for
    writer.writerows(data)
    def download_book_pages_job(book_path, missing_images, start_toc_missing,
                                book):

        db_manager = Database.get_instance()
        db_session = db_manager.create_session()
        dmm_cookies = None
        CronJobManager.logger.info('Starting download job of book %s', book.id)
        db_manager.set_volume_now_downloading(db_session, book.id, True)
        num_missing_images = len(missing_images)
        CronJobManager.notify_subscribers_download_progress(
            book,
            book.pages - num_missing_images,
            start_toc_missing,
            start_toc_missing=start_toc_missing)

        dmm_cookies = CronJobManager.get_dmm_cookies_for_book_download(book)
        download_failed = False

        if dmm_cookies:
            toc_path = path.join(book_path, 'toc.txt')
            if start_toc_missing:
                try:
                    CronJobManager.__instance.dmm_ripper.download_book_toc( \
                        book, toc_path)
                    is_toc_missing = False
                    CronJobManager.notify_subscribers_download_progress(
                        book,
                        book.pages - num_missing_images,
                        is_toc_missing,
                        edit_message=True)
                except Exception as e:
                    CronJobManager.logger.exception(e)
                    CronJobManager.__instance.dmm_ripper.close_broser_reader()
                    is_toc_missing = True
            else:
                is_toc_missing = False
            for index, page_num in enumerate(missing_images):
                try:
                    CronJobManager.__instance.dmm_ripper.download_book_page( \
                        book, page_num, \
                        path.join(book_path, '{}'.format(page_num))
                    )
                except:
                    CronJobManager.__instance.dmm_ripper.close_broser_reader()
                    download_failed = True
                CronJobManager.notify_subscribers_download_progress(
                    book,
                    book.pages - num_missing_images + index + 1,
                    is_toc_missing,
                    start_toc_missing=start_toc_missing,
                    edit_message=True)
            CronJobManager.__instance.dmm_ripper.close_broser_reader()
            CronJobManager.logger.info('Download of book %s has finished',
                                       book.id)
            for subscriber in CronJobManager.book_job[book.id]['download']:
                user = subscriber['user']
                subscriber['bot'].send_message(
                    chat_id=user.id,
                    text=CronJobManager.lang[user.language_code] \
                        ['download_finished'].format(
                            FileFormat(user.file_format).name.upper()
                        )
                )
                CronJobManager.__instance.subscribe_to_book_conversion( \
                    book, book_path, subscriber['user'], subscriber['bot'], \
                    from_download=True
                )
        if not dmm_cookies or download_failed:
            CronJobManager.logger.info('Unable to start the download of ' \
                + 'book %s', book.id)
            for subscriber in CronJobManager.book_job[book.id]:
                user = subscriber['user']
                CronJobManager.logger.info('Sending download error message ' \
                    + 'to subscriber %s', user.id)
                subscriber['bot'].send_message(
                    chat_id = user.id,
                    text = CronJobManager.lang[user.language_code] \
                        ['download_error']
                )
        db_manager.set_volume_now_downloading(db_session, book.id, False)
        CronJobManager.logger.info('Removing the registration of download ' \
            + 'job for book %s', book.id)
        CronJobManager.book_job[book.id]['download'] = []
Beispiel #30
0
def collect_data(arr):
    db = Database()
    word, lex, gram, err, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000':
        return []
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)

    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')'
    if sentences == '()':
        return [], 0, 0
    if s == '0001':
        req += ' AND document_id IN ' + sentences
    else:
        req += ' AND sent_id IN ' + sentences
    f = codecs.open('/home/elmira/learner_corpus/tempfiles/s.txt', 'w')
    f.write(req + '\r\n' + n_req + '\r\n' + d_req)
    f.close()
    rows = db.execute(req)
    sent_num = int(db.execute(n_req)[0][0])
    d_num = int(db.execute(d_req)[0][0])
    return rows, sent_num, d_num
Beispiel #31
0
    parser.add_argument("--epochs",
                        type=int,
                        default=10,
                        help="Number of epochs during training")
    parser.add_argument("--classifier",
                        type=str,
                        default='SVM',
                        choices={'NN', 'SVM'},
                        help="Downstream Classifier")
    parser.add_argument("--seed", type=int, default=0, help="Random Seed")
    args = parser.parse_args()
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    data_path = f'Datasets/{args.data_name}'
    db = Database.load_csv(data_path)

    model_dir = f'models/{args.data_name}/{args.kernel}_{args.depth}_{args.dim}_{args.num_samples}_{args.epochs}_{args.batch_size}_{args.seed}'
    os.makedirs(model_dir, exist_ok=True)

    sample_fct = ek_utlis.ek_sample_fct if args.kernel == 'EK' else mmd_utils.mmd_sample_fct

    Y, rows = db.get_labels()

    scores = []
    split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10)
    for i, (train_index, test_index) in enumerate(split.split(rows, Y)):

        samples = get_samples(db, args.depth, args.num_samples, sample_fct)
        row_idx = {r: i for i, r in enumerate(rows)}
        scheme_idx = {s: i for i, s in enumerate(samples.keys())}
    def cache_user_library(user, session=None, password=None, fast=False):
        db_manager = Database.get_instance()
        db_session = db_manager.create_session()
        db_manager.set_user_now_caching(db_session, user.id, True)
        CronJobManager.logger.info('Caching %s user\'s library', user.id)

        try:
            if session == None:
                if user.save_credentials:
                    password = user.password
                session = CronJobManager.__instance.dmm_ripper.get_session(
                    user.email, password, fast)
                CronJobManager.logger.info('Obtaining a new DMM session for ' \
                    + 'user %s', user.id)
            books = CronJobManager.__instance.dmm_ripper.get_purchased_books(
                session)
            db_session.add(user)
            for book in books:
                if book['series']:
                    serie = db_manager.get_manga_serie(db_session, book['url'])
                    if not serie:
                        serie = MangaSeries(title=book['name'],
                                            url=book['url'],
                                            thumbnail_dmm=book['thumbnail'])
                        db_session.add(serie)
                        CronJobManager.logger.info(
                            'Adding a new serie to DB: ' + '%s', serie.title)
                        CronJobManager.thumbnail(db_session, serie, db_manager)
                    CronJobManager.logger.info('Processing volumes of ' \
                        + 'series %s', serie.title)
                    volumes = CronJobManager.__instance.dmm_ripper \
                    .get_book_volumes(
                        session, book
                    )
                    for volume in volumes:
                        db_volume = db_manager.get_manga_volume(
                            db_session, volume['url'])
                        if not db_volume:
                            volume_details = CronJobManager.__instance \
                            .dmm_ripper.get_book_details(
                                session, volume['details_url']
                            )
                            db_volume = Manga(
                                title=volume['name'],
                                url=volume['url'],
                                thumbnail_dmm=volume['thumbnail'],
                                description=volume_details['description'],
                                pages=volume_details['pages'],
                                serie=serie)
                            db_session.add(db_volume)
                            CronJobManager.logger.info('Adding a new volume ' \
                                + ' to DB: %s', db_volume.title)
                            CronJobManager.thumbnail(db_session,
                                                     db_volume,
                                                     db_manager,
                                                     parent=serie)
                        if not db_manager.user_owns_volume(
                                db_session, user.id, db_volume.url):
                            CronJobManager.logger.info('Adding volume to ' \
                                + 'user %s', user.id)
                            try:
                                user.book_collection.append(db_volume)
                                db_manager.commit(db_session)
                            except Exception as e:
                                CronJobManager.logger.exception('Error ' \
                                    + 'adding volume to user %s', user.id)
                                db_manager.rollback(db_session)
                else:
                    book = db_manager.get_manga_volume(db_session, book['url'])
                    if not book:
                        book_details = CronJobManager.__instance.dmm_ripper \
                        .get_book_details(
                            session, book['details_url']
                        )
                        book = Manga(title=book['name'],
                                     url=book['url'],
                                     thumbnail_dmm=book['thumbnail'],
                                     description=book_details['description'],
                                     pages=book_details['pages'],
                                     serie=serie)
                        db_session.add(book)
                        CronJobManager.logger.info('Adding a new non series ' \
                            + 'book to DB: %s', book.title)
                        CronJobManager.thumbnail(db_session, book, db_manager)
                    if not db_manager.user_owns_volume(db_session, user.id,
                                                       book.url):
                        CronJobManager.logger.info('Adding non series book ' \
                            + 'to user %s', user.id)
                        try:
                            user.book_collection.append(book)
                            db_manager.commit(session)
                        except:
                            CronJobManager.logger.exception('Error adding ' \
                                + 'non series book to user %s', user.id)
                            db_manager.rollback(session)

            db_manager.set_user_cache_expire_date(
                db_session, user.id, CronJobManager.get_cache_expire_date())
            db_manager.set_user_cache_built(db_session, user.id, True)
            db_manager.set_user_login_error(db_session, user.id, False)
        except Exception as e:
            CronJobManager.logger.info('Unable to login to the DMM account ' \
                + 'of user %s', user.id)
            db_manager.set_user_login_error(db_session, user.id, True)
            CronJobManager.remove_scheduled_user_cache(user.id)
            CronJobManager.logger.exception(e)
        finally:
            CronJobManager.logger.info('%s user\'s library caching ended',
                                       user.id)
            db_manager.set_user_now_caching(db_session, user.id, False)
            db_manager.remove_session()
Beispiel #33
0
def collect_data(arr):
    db = Database()
    word, lex, gram, err, comment, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000' or (flag and len(docs) == 0):
        return [], 0, 0
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' % \
                       (parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \
                       %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \
                       %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page)
    # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f:
    #     f.write(req1)
    sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')'
    if sentences == '()':
        return [], 0, 0
    if s == '0001':
        req += ' AND document_id IN ' + sentences
    else:
        req += ' AND sent_id IN ' + sentences
    rows = db.execute(req)
    sent_num = int(db.execute(n_req)[0][0])
    d_num = int(db.execute(d_req)[0][0])
    return rows, sent_num, d_num
class DataFeed():
    def __init__(self):
        self.url = "wss://ws-feed.gdax.com"
        self.public_client = gdax.PublicClient()

        self.product_ids = GDAX_PRODUCT_IDS

        self.order_books = {x: {} for x in self.product_ids}
        self.inside_order_books = {
            x: {
                "bids": {},
                "asks": {}
            }
            for x in self.product_ids
        }
        self.last_trade_ids = {x: None for x in self.product_ids}

        self.db = Database(DATABASE['GDAX'], migrate=True)

        self.ws = websocket.WebSocketApp(
            self.url,
            on_message=self.on_message,
            on_error=self.on_error,
            on_open=self.on_open,
        )

    def on_message(self, ws, msg):
        msg = json.loads(msg)

        product_id = msg['product_id']

        if msg['type'] == 'snapshot':
            self.order_books[product_id] = {
                'bids': msg['bids'],
                'asks': msg['asks']
            }

        if msg['type'] == 'l2update':
            changes = msg['changes']

            for change in changes:
                change_side = 'bids' if change[0] == 'buy' else 'asks'
                change_price = float(change[1])
                change_volume = float(change[2])

                orders = self.order_books[product_id][change_side]
                level_index = [
                    i for i, order in enumerate(orders)
                    if float(order[0]) == float(change[1])
                ]

                if level_index:
                    if float(change[2]) != 0:
                        self.order_books[product_id][change_side][min(
                            level_index)][1] = change[2]
                    else:
                        self.order_books[product_id][change_side].pop(
                            min(level_index))

                if not level_index:
                    if change_side == 'bids':
                        insert_indexes = [
                            i for i, order in enumerate(orders)
                            if float(order[0]) >= float(change[1])
                        ]
                    if change_side == 'asks':
                        insert_indexes = [
                            i for i, order in enumerate(orders)
                            if float(order[0]) <= float(change[1])
                        ]
                    if not insert_indexes:
                        insert_index = -1
                    else:
                        insert_index = max(insert_indexes)
                    self.order_books[product_id][change_side].insert(
                        insert_index + 1, [change[1], change[2]])

            inside_bids = {
                'bids_' + str(x + 1):
                "@".join(self.order_books[product_id]['bids'][x][::-1])
                for x in range(15)
            }
            inside_asks = {
                'asks_' + str(x + 1):
                "@".join(self.order_books[product_id]['asks'][x][::-1])
                for x in range(15)
            }
            inside_order_book = {"bids": inside_bids, "asks": inside_asks}

            if self.inside_order_books[product_id] != inside_order_book:
                row = {
                    "server_datetime":
                    datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f%Z"),
                    "product_id":
                    product_id
                }
                row.update(inside_bids)
                row.update(inside_asks)

                self.db.insert_into("gdax_order_book", data=row)

                self.inside_order_books[product_id] = inside_order_book
                print(row)

        if msg['type'] == 'match':
            trades = [{
                "server_datetime":
                datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f%Z"),
                "exchange_datetime":
                msg['time'],
                "sequence":
                msg['sequence'],
                "trade_id":
                msg['trade_id'],
                "product_id":
                product_id,
                'price':
                msg['price'],
                'volume':
                msg['size'],
                'side':
                msg['side'],
                'backfilled':
                'False'
            }]

            current_trade_id = int(msg["trade_id"])
            if self.last_trade_ids[product_id]:
                last_trade_id = int(self.last_trade_ids[product_id])
            else:
                last_trade_id = current_trade_id
            self.last_trade_ids[product_id] = msg["trade_id"]
            if current_trade_id > (last_trade_id + 1):
                missing_trade_ids = list(
                    range(last_trade_id + 1, current_trade_id))
                print("missed the following trades: " + str(missing_trade_ids))
                product_trades = self.public_client.get_product_trades(
                    product_id=product_id)
                for missing_trade_id in missing_trade_ids:
                    missing_trade_index = [
                        i for i, product_trade in enumerate(product_trades)
                        if int(product_trade['trade_id']) == missing_trade_id
                    ][0]
                    missing_product_trade = product_trades[missing_trade_index]
                    missing_trade = {
                        "server_datetime":
                        datetime.datetime.now().strftime(
                            "%Y-%m-%dT%H:%M:%S.%f%Z"
                        ),  #2017-10-15T05:10:53.700000Z
                        "exchange_datetime":
                        missing_product_trade['time'],
                        "sequence":
                        "None",
                        "trade_id":
                        missing_product_trade['trade_id'],
                        "product_id":
                        product_id,
                        'price':
                        missing_product_trade['price'],
                        'volume':
                        missing_product_trade['size'],
                        'side':
                        missing_product_trade['side'],
                        'backfilled':
                        'True'
                    }
                    trades.append(missing_trade)

            for trade in trades:
                self.db.insert_into("gdax_trades", trade)
                print(trade)

    def on_error(self, ws, error):
        print(error)

    def on_open(self, ws):
        request = {
            "type": "subscribe",
            "product_ids": self.product_ids,
            "channels": ["level2", "matches"]
        }
        request = json.dumps(request)
        request = request.encode("utf-8")
        ws.send(request)

    def run(self):
        try:
            self.ws.run_forever()
        except KeyboardInterrupt:
            sys.exit()
        except Exception:
            pass

def compute_embedding(db):
    G = db.get_row_val_graph_reg()
    embedding, model = get_node2vec_embedding_new(G, epochs=5)
    return embedding, model


if __name__ == "__main__":
    name = 'mutagenesis'
    embedding_name = 'testn3.pckl'

    path = f'Datasets/{name}'
    embedding_path = f'Embeddings/{name}/{embedding_name}'

    db = Database.load_csv(path)
    Y, rows = db.get_labels()

    scores = []
    split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10)
    for i, (train_index, test_index) in enumerate(split.split(rows, Y)):
        embedding, _ = io_utils.load_or_compute(f'{embedding_path}_{i}',
                                                lambda: compute_embedding(db))

        X_train = np.float32([embedding[rows[j]] for j in train_index])
        X_test = np.float32([embedding[rows[j]] for j in test_index])
        Y_train, Y_test = [Y[i]
                           for i in train_index], [Y[i] for i in test_index]

        clf = SVC(kernel='rbf', C=1.0)
Beispiel #36
0
def make_tables():
    db = Database()
    db.execute("DROP TABLE IF EXISTS word_info;", 0)
    db.execute(
        """CREATE TABLE word_info
                    (word_id INTEGER PRIMARY KEY AUTOINCREMENT,
                    word TEXT, 
                    definition TEXT,
                    updater TEXT);
                           """, 0)
    db.execute("DROP TABLE IF EXISTS examples;", 0)
    db.execute(
        """CREATE TABLE examples
                    (word_id INTEGER, 
                    example TEXT);
                            """, 0)
    db.execute("DROP TABLE IF EXISTS updaters;", 0)
    db.execute(
        """CREATE TABLE updaters
                    (author_id INTEGER PRIMARY KEY AUTOINCREMENT,
                    city TEXT,
                    date DATE);
                            """, 0)
    db.commit()
Beispiel #37
0
class DataFeed():
    def __init__(self):
        self.url = "wss://api2.poloniex.com"
        #self.public_client = polo.PublicClient()

        self.product_ids = POLO_PRODUCT_IDS
        self.product_codes = {}
        self.order_books = {x: {} for x in self.product_ids}
        self.inside_order_books = {
            x: {
                "bids": {},
                "asks": {}
            }
            for x in self.product_ids
        }
        self.last_trade_ids = {x: None for x in self.product_ids}

        self.db = Database(DATABASE['POLO'], migrate=True)

        self.ws = websocket.WebSocketApp(
            self.url,
            on_message=self.on_message,
            on_error=self.on_error,
            on_open=self.on_open,
        )

    def on_message(self, ws, msg):
        msg = json.loads(msg)

        for message in msg[
                2]:  #maybe will be better if current implementation doesn't work
            if message[0] == 'i':
                print("got ob snapshot")
                all_bids = message[1]['orderBook'][1]
                all_asks = message[1]['orderBook'][0]
                final_all_bids = [[
                    x, all_bids[x]
                ] for x in sorted(all_bids, key=sorting_key, reverse=True)]
                final_all_asks = [[x, all_asks[x]]
                                  for x in sorted(all_asks, key=sorting_key)]
                self.order_books[message[1]['currencyPair']] = {
                    'bids': final_all_bids,
                    'asks': final_all_asks
                }
                self.product_codes[msg[0]] = message[1]['currencyPair']
            elif message[0] == 'o':
                change_side = 'bids' if message[1] == 1 else 'asks'
                orders = self.order_books[self.product_codes[
                    msg[0]]][change_side]
                level_index = [
                    i for i, order in enumerate(orders)
                    if float(order[0]) == float(message[2])
                ]
                if level_index:
                    if float(message[3]) != 0:
                        self.order_books[self.product_codes[msg[0]]][
                            change_side][min(level_index)][1] = message[3]
                    else:
                        self.order_books[self.product_codes[
                            msg[0]]][change_side].pop(min(level_index))
                if not level_index:
                    if change_side == 'bids':
                        insert_indexes = [
                            i for i, order in enumerate(orders)
                            if float(order[0]) >= float(message[1])
                        ]
                    if change_side == 'asks':
                        insert_indexes = [
                            i for i, order in enumerate(orders)
                            if float(order[0]) <= float(message[1])
                        ]
                    if not insert_indexes:
                        insert_index = -1
                    else:
                        insert_index = max(insert_indexes)
                    self.order_books[self.product_codes[
                        msg[0]]][change_side].insert(insert_index + 1,
                                                     [message[2], message[3]])

                inside_bids = {
                    'bids_' + str(x + 1): "@".join(self.order_books[
                        self.product_codes[msg[0]]]['bids'][x][::-1])
                    for x in range(15)
                }
                inside_asks = {
                    'asks_' + str(x + 1): "@".join(self.order_books[
                        self.product_codes[msg[0]]]['asks'][x][::-1])
                    for x in range(15)
                }
                inside_order_book = {"bids": inside_bids, "asks": inside_asks}

                if self.inside_order_books[self.product_codes[
                        msg[0]]] != inside_order_book:
                    row = {
                        "server_datetime":
                        datetime.datetime.now().strftime(
                            "%Y-%m-%dT%H:%M:%S.%f%Z"),
                        "product_id":
                        self.product_codes[msg[0]]
                    }
                    row.update(inside_bids)
                    row.update(inside_asks)

                    self.db.insert_into("polo_order_book", row)

                    self.inside_order_books[self.product_codes[
                        msg[0]]] = inside_order_book
                    print(row)

            elif message[0] == 't':
                # TRADES
                # ["t","9394200",1,"5545.00000000","0.00009541",1508060546]
                # [trade, tradeId, 0/1 (sell/buy), price, amount, timestamp]
                #print(message)
                trades = [{
                    "server_datetime":
                    datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f%Z"),
                    "exchange_datetime":
                    datetime.datetime.fromtimestamp(
                        message[5]).strftime("%Y-%m-%dT%H:%M:%S.%f%Z"),
                    "sequence":
                    msg[1],
                    "trade_id":
                    message[1],
                    "product_id":
                    self.product_codes[msg[0]],
                    'price':
                    message[3],
                    'volume':
                    message[4],
                    'side':
                    'sell' if message[2] == 0 else 'buy',
                    'backfilled':
                    'False'
                }]

                current_trade_id = int(message[1])
                if self.last_trade_ids[self.product_codes[msg[0]]]:
                    last_trade_id = int(
                        self.last_trade_ids[self.product_codes[msg[0]]])
                else:
                    last_trade_id = current_trade_id
                self.last_trade_ids[self.product_codes[msg[0]]] = message[1]
                if current_trade_id > (last_trade_id + 1):
                    missing_trade_ids = list(
                        range(last_trade_id + 1, current_trade_id))
                    print("missed the following trades: " +
                          str(missing_trade_ids))

                for trade in trades:
                    self.db.insert_into("polo_trades", trade)
                    print(trade)

    def on_error(self, ws, error):
        print(error)

    def on_open(self, ws):
        #request = {
        #    "type": "subscribe",
        #    "channel": "BTC_ETH"}
        #request = json.dumps(request)
        #request = request.encode("utf-8")
        for x in self.product_ids:
            ws.send(json.dumps({'command': 'subscribe', 'channel': x}))
        #ws.send(request)

    def run(self):
        try:
            self.ws.run_forever()
        except KeyboardInterrupt:
            sys.exit()
        except Exception:
            pass
Beispiel #38
0
def collect_full_data(arr):
    db = Database()
    word, lex, gram, err, docs, flag, page, per_page = arr
    err = err.strip()
    s = bincode(word, lex, gram, err)
    if s == '0000':
        return [], 0, 0
    elif s == '0001':
        req_template = ''' FROM annotator_annotation
                 LEFT JOIN annotator_sentence
                 ON annotator_annotation.document_id = annotator_sentence.id WHERE '''
        req_template += parse_gram(err, 'tag')
        if flag:
            req_template += 'AND doc_id_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template
        req1 = 'SELECT DISTINCT document_id' + req_template
        req = 'SELECT DISTINCT document_id, start, end' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template
    elif s == '0010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''+ parse_gram(gram, 'gram')
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 '''
        req_template += parse_lex(lex)
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0110':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '0111':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1000':
        req = '''SELECT DISTINCT sent_id, num FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" ''' %word
        if flag:
            req += 'AND doc_id IN ('+','.join(docs)+')'
    elif s == '1001':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1010':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1011':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1100':
        req_template = ''' FROM  annotator_morphology
        LEFT JOIN annotator_token
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1101':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    elif s == '1110':
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    else:
        req_template = ''' FROM  annotator_token
        LEFT JOIN annotator_morphology
        ON annotator_token.id = annotator_morphology.token_id
        LEFT JOIN annotator_annotation
        ON annotator_token.sent_id = annotator_annotation.document_id
        WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram'))
        if flag:
            req_template += 'AND doc_id IN ('+','.join(docs)+')'
        n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template
        req = 'SELECT DISTINCT sent_id, num' + req_template
        req1 = 'SELECT DISTINCT sent_id' + req_template
        d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template
    rows = db.execute(req)
    # sent_num = int(db.execute(n_req)[0][0])
    # d_num = int(db.execute(d_req)[0][0])
    return rows, 0,0
Beispiel #39
0
 def __init__(self, lang):
     self.lang = lang
     self.db_manager = Database.get_instance()
     InlineQueryHandler.__init__(self,
                                 self.inline_query,
                                 pass_chat_data=True)