def exact_search(word, docs, flag, expand, page, per_page): db = Database() word = word.split()[0] req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" ' if flag: req1 += 'AND doc_id IN ('+','.join(docs) + ');' docs_len = int(db.execute(req1)[0][0]) n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" ' if flag: n_req += 'AND doc_id IN ('+','.join(docs) + ');' sent_num = int(db.execute(n_req)[0][0]) req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" ' if flag: req2 += 'AND doc_id IN ('+','.join(docs) + ')' req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')' if sentences != '()': req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences tokens = db.execute(req3) else: tokens = [] # tokens = Token.objects.filter(token__exact=word) e = defaultdict(list) for i, j in tokens: e[i].append(j) jq = [] sent_list = [ShowSentence(i, e[i], expand) for i in sorted(e)] ShowSentence.empty() for sent in sent_list: jq.append(jquery.replace('***', str(sent.id))) return jq, sent_list, word, docs_len, sent_num
def exact_full_search(word, docs, flag, expand, page, per_page): db = Database() s = word words = word.split(' ') jq = [] a = {} for wn in range(len(words)): w = words[wn] req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ w +'" ' if flag: req3 += 'AND doc_id IN ('+','.join(docs) + ')' rows = db.execute(req3) e = defaultdict(list) if rows: for i, j in rows: e[i].append(j) if not a: a = SentBag(e, len(words)) else: fr, t = wn, wn a.update(e, fr, t) a = a.finalize(len(words)) sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)] ShowSentence.empty() sent_num = len(sent_list) d_num = len(set(i.doc_id for i in sent_list)) sent_list = sorted(sent_list, key=lambda i: i.id)[per_page*(page-1):per_page*page] for sent in sent_list: jq.append(jquery.replace('***', str(sent.id))) return jq, sent_list, s, d_num, sent_num
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden" if u'rulec' in query: req += 'AND subcorpus="RULEC" ' mode = query.get(u'mode').encode('utf-8') if mode != u'any': req += 'AND mode="'+ mode +'" ' background = query.get(u'background').encode('utf-8') if background != u'any': req += 'AND language_background="'+ background +'" ' gender = query.get(u'gender').encode('utf-8') if gender != u'any': req += 'AND gender="'+ gender +'" ' date1 = query.get(u'date1') if date1 != u'': req += 'AND date1>='+ date1.encode('utf-8') +' ' date2 = query.get(u'date2') if date2 != u'': req += 'AND date2<='+ date2.encode('utf-8') +' ' language = query.getlist(u'language[]') if language != []: one = [] for lang in language: one.append('native="'+ lang.encode('utf-8') +'"') if len(one) == 1: req += 'AND '+ one[0] else: req += 'AND (' + ' OR '.join(one) + ')' # with codecs.open('s.txt', 'w', encoding='utf-8') as f: # f.write(req) db = Database() docs = [str(i[0]) for i in db.execute(req)] subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')') flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True return docs, subsum[0][0], subsum[0][1], flag
def exact_search(word, docs, flag, expand, page, per_page): db = Database() # db.cur.execute('SELECT tok.sent_id, tok.doc_id, sent.text FROM `annotator_token` tok, `annotator_sentence` sent WHERE tok.token="дом" and tok.sent_id=sent.id;') req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" ' if flag: req1 += 'AND doc_id IN ('+','.join(docs) + ');' docs_len = int(db.execute(req1)[0][0]) n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" ' if flag: n_req += 'AND doc_id IN ('+','.join(docs) + ');' sent_num = int(db.execute(n_req)[0][0]) req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" ' if flag: req2 += 'AND doc_id IN ('+','.join(docs) + ')' req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')' req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences tokens = db.execute(req3) # tokens = Token.objects.filter(token__exact=word) e = defaultdict(list) for i, j in tokens: e[i].append(j) jq = [] sent_list = [ShowSentence(i, e[i], expand) for i in e] for sent in sent_list: # sent.temp = bold(word, sent.tagged) # sent.save() jq.append(jquery.replace('***', str(sent.id))) return jq, sent_list, word, docs_len, sent_num
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' if u'checked' in query: req += 'AND checked=True ' if u'annotated' in query: req += 'AND annotated=True ' gender = query.get(u'gender').encode('utf-8') if gender != u'any': req += 'AND gender="'+ gender +'" ' date1 = query.get(u'date1') if date1 != u'': req += 'AND date1>='+ date1 +' ' date2 = query.get(u'date2') if date2 != u'': req += 'AND date2<='+ date2 +' ' genre = query.getlist(u'genre[]') if genre != []: req += make_small_query(genre, 'genre') major = query.getlist(u'major[]') if major != []: req += make_small_query(major, 'major') course = query.getlist(u'course[]') if course != []: req += make_small_query(course, 'course') db = Database() docs = [str(i[0]) for i in db.execute(req)] subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')') flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True return docs, subsum[0][0], subsum[0][1], flag
async def startup(self): self.db = Database(DATABASE['GDAX'], migrate=False) self.polo_db = Database(DATABASE['POLO'], migrate=False) self.migrate = True if self.migrate: await self.db.migrate() await self.polo_db.migrate()
def download_file(request, doc_id, doc_type): db = Database() if doc_type == 'ann': req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" % doc_id text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n' rows = db.execute(req) for row in rows: data = json.loads(row[1]) text += '\t'.join([ str(row[0]), data['quote'], data['corrs'], row[2], str(row[3]), str(row[4]) ]) + '\r\n' response = HttpResponse(text, content_type='text/csv; charset=utf-8') response[ 'Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' % doc_id return response elif doc_type == u'text': req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" % doc_id text = ' '.join( h.unescape(i[0]).encode('cp1251') for i in db.execute(req)) response = HttpResponse(text, content_type='text/plain') response['Content-Disposition'] = 'filename="text_%s.txt"' % doc_id return response else: req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" % doc_id rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join( u'\t'.join([str(row[2]), row[0], str(row[1]), '', '', '']) for row in db.execute(req)) response = HttpResponse(rows, content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' % doc_id return response
def complex_search(age, city, f_surname): db = Database() res = db.execute('''SELECT f_name, f_surname, f_age, f_city FROM friendship WHERE f_city = %s AND friend_1 IN (SELECT id FROM user_info WHERE user_surname = %s) HAVING f_age > %s''', (city, f_surname, age)) return res
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden" if u'rulec' in query: req += 'AND subcorpus="RULEC" ' mode = query.get(u'mode').encode('utf-8') if mode != u'any': req += 'AND mode="' + mode + '" ' background = query.get(u'background').encode('utf-8') if background != u'any': req += 'AND language_background="' + background + '" ' gender = query.get(u'gender').encode('utf-8') if gender != u'any': req += 'AND gender="' + gender + '" ' date1 = query.get(u'date1') if date1 != u'': req += 'AND date1>=' + date1.encode('utf-8') + ' ' date2 = query.get(u'date2') if date2 != u'': req += 'AND date2<=' + date2.encode('utf-8') + ' ' language = query.getlist(u'language[]') if language != []: one = [] for lang in language: one.append('native="' + lang.encode('utf-8') + '"') if len(one) == 1: req += 'AND ' + one[0] else: req += 'AND (' + ' OR '.join(one) + ')' glevel = query.getlist(u'generallevel[]') if glevel != []: one = [] for l in glevel: one.append('general_level="' + l.encode('utf-8') + '"') if len(one) == 1: req += 'AND ' + one[0] else: req += 'AND (' + ' OR '.join(one) + ')' level = query.getlist(u'level[]') if level != []: one = [] for l in level: one.append('level="' + l.encode('utf-8') + '"') if len(one) == 1: req += 'AND ' + one[0] else: req += 'AND (' + ' OR '.join(one) + ')' # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f: # f.write(req) db = Database() docs = [str(i[0]) for i in db.execute(req)] num_docs = Document.objects.count() subsum = db.execute( 'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' + req + ')') flag = False if num_docs == len(docs) else True return docs, subsum[0][0], subsum[0][1], flag
def get_orig_sent(doc_id, num): db = Database() req = 'SELECT text FROM `annotator_originalsentence` ' \ 'WHERE doc_id_id={} AND num={}'.format(doc_id, num) # fw = open('log.txt', 'w') # fw.write(str(req)) # fw.close() orig_sent = db.execute(req)[0] return orig_sent[0]
def view_all(): out = [] db = Database() res = db.execute('SELECT * FROM user_info JOIN friendship', 0) for el in res: out.append([el[1], el[2], el[3], el[4]]) out.append([el[7], el[8], el[9], el[10]]) unique = [] for el in out: if el not in unique: unique.append(el) return unique
def __init__(self): if CronJobManager.__instance != None: raise Exception("This class is a singleton!") else: self.update_cron_start_time() self.db_manager = Database.get_instance() self.dmm_ripper = DMMRipper.get_instance( CronJobManager.webdriver_config) jobstores = { # 'alchemy': SQLAlchemyJobStore(url='sqlite:///jobs.sqlite'), 'default': MemoryJobStore() } executors = { 'default': { 'type': 'threadpool', 'max_workers': 20 }, 'processpool': ProcessPoolExecutor(max_workers=5) } job_defaults = {'coalesce': False, 'max_instances': 3} self.scheduler = BackgroundScheduler() self.scheduler.configure(jobstores=jobstores, executors=executors, job_defaults=job_defaults, timezone=CronJobManager.time_zone, daemon=False) self.scheduler.start() CronJobManager.__instance = self
def __init__(self, max_download_size, download_path, lang, initial_state): self.db_manager = Database.get_instance() self.scheduler = CronJobManager.get_instance() self.logger = logging.getLogger(__name__) self.download_path = utils.get_abs_path(download_path) self.max_download_size = max_download_size self.lang = lang self.initial_state = initial_state self.PROCESS_PASSWORD = range( initial_state, initial_state + BookDownloadHandler.num_states ) self.entry_points = [CallbackQueryHandler( self.callback, pass_user_data=True )] self.states = { self.PROCESS_PASSWORD: [RegexHandler( '.*', self.process_password, pass_user_data=True )] } self.fallbacks=[RegexHandler('3248BC7547CE97B2A197B2A06CF7283D', self.cancel)] ConversationHandler.__init__( self, entry_points=self.entry_points, states=self.states, fallbacks=self.fallbacks, per_chat=False )
def orig_exact_search(word, docs, flag, expand, page, per_page): db = Database() s = word words = word.split(' ') jq = [] a = {} for wn in range(len(words)): w = words[wn] req4 = 'SELECT doc_id_id, num, text FROM `annotator_originalsentence` WHERE text REGEXP "'+ w +'" ' if flag: req4 += 'AND doc_id_id IN ('+','.join(docs) + ')' rows = db.execute(req4) w = open('l.txt', 'a') w.write('\n') w.write(str(rows)) w.close() sent_list = {} if rows: for sent in rows: # req5 = 'SELECT text FROM `annotator_sentence` WHERE doc_id_id="' + str(sent[0]) + '"AND num="' + str(sent[1]) + '" ' # sents = db.execute(req5) # for s in sents: # sent_list[sent] = s[0].encode('utf-8') sent_list[sent] = ShowSentence1(sent[0], sent[1], expand) # print(sent[0], sent[1], sent_list[sent].text) ShowSentence.empty() # w = open('l.txt', 'a') # w.write('\n') # w.write(str(sent_list)) # w.close() # sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)] # ShowSentence.empty() sent_num = len(sent_list) d_num = len(set(sent[0] for sent in sent_list)) # sent_list = sorted(sent_list, key=lambda i: i[0])[per_page*(page-1):per_page*page] for sent in sent_list: jq.append(jquery.replace('***', str(sent[1]))) # w = open('l.txt', 'a') # w.write('\n') # w.write(str(a)) # w.close() return jq, sent_list, s, d_num, sent_num
def collect_data(arr): word, lex, gram, err, docs, flag = arr if all(i=="" for i in [word, lex, gram, err]): return [] if [word, lex, gram] == ["", "", ""] and err != '': req = '''SELECT DISTINCT document_id, start, end FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != ''] for er in errs: req += 'AND tag REGEXP "[[:<:]]' + er + '[[:>:]]" ' if flag: req += 'AND doc_id_id IN ('+','.join(docs)+');' else: if err != '': req = '''SELECT DISTINCT sent_id, num FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 ''' errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != ''] for er in errs: req += 'AND tag LIKE "%' + er + '%" ' req += 'AND num>= annotator_annotation.start AND num <= annotator_annotation.end ' else: req = '''SELECT DISTINCT sent_id, num FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' if word != '': req += 'AND lem="'+word+'" ' if lex != '': req += 'AND lex LIKE "%' + lex + '%" ' if gram != '': req += parse_gram(gram) if flag: req += 'AND doc_id IN ('+','.join(docs)+');' # f = codecs.open('s.txt', 'w') # f.write(req) # f.close() db = Database() rows = db.execute(req) return rows
def __init__(self): self.url = "wss://ws-feed.gdax.com" self.public_client = ccxt.gdax() self.product_ids = GDAX_PRODUCT_IDS self.order_books = {x: {} for x in self.product_ids} self.inside_order_books = { x: {"bids": {}, "asks": {}} for x in self.product_ids } self.last_trade_ids = {x: None for x in self.product_ids} self.db = Database(DATABASE['GDAX'], migrate=True) self.ws = websocket.WebSocketApp( self.url, on_message=self.on_message, on_error=self.on_error, on_open=self.on_open, )
def search(parameter, value): db = Database() out = [] res = 0 if parameter == 'name': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_name = %s OR f_name = %s)''', (value, value)) elif parameter == 'surname': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_surname = %s OR f_surname = %s)''', (value, value)) elif parameter == 'city': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_city = %s OR f_city = %s)''', (value, value)) elif parameter == 'age': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_age = %s OR f_age = %s)''', (value, value)) for el in res: out.append([el[1], el[2], el[3], el[4]]) out.append([el[7], el[8], el[9], el[10]]) if parameter == 'age': out = [el for el in out if int(value) in el] else: out = [el for el in out if value in el] unique = [] for el in out: if el not in unique: unique.append(el) return unique
def insert_user_info(name, surname, city, user_age): db = Database() db.execute(''' INSERT INTO user_info (user_name, user_surname, user_city, user_age) VALUES (%s, %s, %s, %s) ''', (name, surname, city, user_age)) db.commit()
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden" # if u'rulec' in query: # req += 'AND subcorpus="RULEC" ' # mode = query.get(u'mode').encode('utf-8') # if mode != u'any': # req += 'AND mode="'+ mode +'" ' # background = query.get(u'background').encode('utf-8') # if background != u'any': # req += 'AND language_background="'+ background +'" ' # gender = query.get(u'gender').encode('utf-8') # if gender != u'any': # req += 'AND gender="'+ gender +'" ' # date1 = query.get(u'date1') # if date1 != u'': # req += 'AND date1>='+ date1.encode('utf-8') +' ' # date2 = query.get(u'date2') # if date2 != u'': # req += 'AND date2<='+ date2.encode('utf-8') +' ' # language = query.getlist(u'language[]') # if language != []: # one = [] # for lang in language: # one.append('native="'+ lang.encode('utf-8') +'"') # if len(one) == 1: # req += 'AND '+ one[0] # else: # req += 'AND (' + ' OR '.join(one) + ')' # with codecs.open('s.txt', 'w', encoding='utf-8') as f: # f.write(req) db = Database() docs = [str(i[0]) for i in db.execute(req)] subsum = db.execute( 'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' + req + ')') flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True return docs, subsum[0][0], subsum[0][1], flag
def download_file(request, doc_id, doc_type): db = Database() if doc_type == 'ann': req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" %doc_id text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n' rows = db.execute(req) for row in rows: data = json.loads(row[1]) text += '\t'.join([str(row[0]), data['quote'], data['corrs'], row[2], str(row[3]), str(row[4])]) + '\r\n' response = HttpResponse(text, content_type='text/csv; charset=utf-8') response['Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' %doc_id return response elif doc_type == u'text': req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" %doc_id text = ' '.join(h.unescape(i[0]).encode('cp1251') for i in db.execute(req)) response = HttpResponse(text, content_type='text/plain') response['Content-Disposition'] = 'filename="text_%s.txt"' %doc_id return response else: req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" %doc_id rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join(u'\t'.join([str(row[2]),row[0], str(row[1]), '', '', '']) for row in db.execute(req)) response = HttpResponse(rows, content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' %doc_id return response
def __init__(self): self.url = "wss://api2.poloniex.com" #self.public_client = polo.PublicClient() self.product_ids = POLO_PRODUCT_IDS self.product_codes = {} self.order_books = {x: {} for x in self.product_ids} self.inside_order_books = { x: { "bids": {}, "asks": {} } for x in self.product_ids } self.last_trade_ids = {x: None for x in self.product_ids} self.db = Database(DATABASE['POLO'], migrate=True) self.ws = websocket.WebSocketApp( self.url, on_message=self.on_message, on_error=self.on_error, on_open=self.on_open, )
def insert_friend_info(user_name, user_surname, user_city, user_age, name, surname, city, age): db = Database() res = db.execute(''' SELECT id FROM user_info WHERE user_name = %s AND user_surname = %s AND user_city = %s AND user_age = %s ''', (user_name, user_surname, user_city, user_age)) print(res) try: user_id = res[0][0] db.execute(''' INSERT INTO friendship (friend_1, f_name, f_surname, f_city, f_age) VALUES (%s, %s, %s, %s, %s) ''', (user_id, name, surname, city, age)) except IndexError: db.execute(''' INSERT INTO friendship (friend_name, f_surname, f_city, f_age) VALUES (%s, %s, %s, %s) ''', (name, surname, city, age)) db.commit()
def __init__(self, lang, language_codes, initial_state): self.db_manager = Database.get_instance() self.scheduler = CronJobManager.get_instance() self.logger = logging.getLogger(__name__) self.lang = lang self.language_codes = language_codes self.initial_state = initial_state self.LANGUAGE, self.EMAIL, self.PASSWORD, self.STORE_PASS = range( initial_state, initial_state + StartWizard.num_states) self.entry_points = [CommandHandler('start', self.start)] self.states = { self.LANGUAGE: [RegexHandler('.*', self.language)], self.EMAIL: [RegexHandler('.*', self.email)], self.STORE_PASS: [RegexHandler('.*', self.save_credentials)], self.PASSWORD: [RegexHandler('.*', self.password)], } self.fallbacks = [ CommandHandler('10aec35353f9c4096a71c38654c3d402', self.cancel) ] ConversationHandler.__init__(self, entry_points=self.entry_points, states=self.states, fallbacks=self.fallbacks)
def collect_full_data(arr): db = Database() word, lex, gram, err, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000' or (flag and len(docs) == 0): return [], 0, 0 elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \ %(parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req = '''SELECT DISTINCT sent_id, num FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req += 'AND doc_id IN ('+','.join(docs)+')' elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \ %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template rows = db.execute(req) return rows, 0,0
def view_registered(): db = Database() res = db.execute('''SELECT * FROM user_info''', 0) return res
import sys import csv import sqlite3 from db_utils import Database from config import DATABASE db = Database(DATABASE['GDAX'], row_factory=sqlite3.Row) data = db._execute("SELECT * FROM gdax_order_book", {}, fetch=True) titles = data[0].keys() mode = "wb" if sys.version_info < (3, ) else "w" with open('order_books.csv', mode) as f: writer = csv.writer(f, delimiter=',') writer.writerow(titles) # keys=title you're looking for writer.writerows(data)
def download_book_pages_job(book_path, missing_images, start_toc_missing, book): db_manager = Database.get_instance() db_session = db_manager.create_session() dmm_cookies = None CronJobManager.logger.info('Starting download job of book %s', book.id) db_manager.set_volume_now_downloading(db_session, book.id, True) num_missing_images = len(missing_images) CronJobManager.notify_subscribers_download_progress( book, book.pages - num_missing_images, start_toc_missing, start_toc_missing=start_toc_missing) dmm_cookies = CronJobManager.get_dmm_cookies_for_book_download(book) download_failed = False if dmm_cookies: toc_path = path.join(book_path, 'toc.txt') if start_toc_missing: try: CronJobManager.__instance.dmm_ripper.download_book_toc( \ book, toc_path) is_toc_missing = False CronJobManager.notify_subscribers_download_progress( book, book.pages - num_missing_images, is_toc_missing, edit_message=True) except Exception as e: CronJobManager.logger.exception(e) CronJobManager.__instance.dmm_ripper.close_broser_reader() is_toc_missing = True else: is_toc_missing = False for index, page_num in enumerate(missing_images): try: CronJobManager.__instance.dmm_ripper.download_book_page( \ book, page_num, \ path.join(book_path, '{}'.format(page_num)) ) except: CronJobManager.__instance.dmm_ripper.close_broser_reader() download_failed = True CronJobManager.notify_subscribers_download_progress( book, book.pages - num_missing_images + index + 1, is_toc_missing, start_toc_missing=start_toc_missing, edit_message=True) CronJobManager.__instance.dmm_ripper.close_broser_reader() CronJobManager.logger.info('Download of book %s has finished', book.id) for subscriber in CronJobManager.book_job[book.id]['download']: user = subscriber['user'] subscriber['bot'].send_message( chat_id=user.id, text=CronJobManager.lang[user.language_code] \ ['download_finished'].format( FileFormat(user.file_format).name.upper() ) ) CronJobManager.__instance.subscribe_to_book_conversion( \ book, book_path, subscriber['user'], subscriber['bot'], \ from_download=True ) if not dmm_cookies or download_failed: CronJobManager.logger.info('Unable to start the download of ' \ + 'book %s', book.id) for subscriber in CronJobManager.book_job[book.id]: user = subscriber['user'] CronJobManager.logger.info('Sending download error message ' \ + 'to subscriber %s', user.id) subscriber['bot'].send_message( chat_id = user.id, text = CronJobManager.lang[user.language_code] \ ['download_error'] ) db_manager.set_volume_now_downloading(db_session, book.id, False) CronJobManager.logger.info('Removing the registration of download ' \ + 'job for book %s', book.id) CronJobManager.book_job[book.id]['download'] = []
def collect_data(arr): db = Database() word, lex, gram, err, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000': return [] elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')' if sentences == '()': return [], 0, 0 if s == '0001': req += ' AND document_id IN ' + sentences else: req += ' AND sent_id IN ' + sentences f = codecs.open('/home/elmira/learner_corpus/tempfiles/s.txt', 'w') f.write(req + '\r\n' + n_req + '\r\n' + d_req) f.close() rows = db.execute(req) sent_num = int(db.execute(n_req)[0][0]) d_num = int(db.execute(d_req)[0][0]) return rows, sent_num, d_num
parser.add_argument("--epochs", type=int, default=10, help="Number of epochs during training") parser.add_argument("--classifier", type=str, default='SVM', choices={'NN', 'SVM'}, help="Downstream Classifier") parser.add_argument("--seed", type=int, default=0, help="Random Seed") args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) data_path = f'Datasets/{args.data_name}' db = Database.load_csv(data_path) model_dir = f'models/{args.data_name}/{args.kernel}_{args.depth}_{args.dim}_{args.num_samples}_{args.epochs}_{args.batch_size}_{args.seed}' os.makedirs(model_dir, exist_ok=True) sample_fct = ek_utlis.ek_sample_fct if args.kernel == 'EK' else mmd_utils.mmd_sample_fct Y, rows = db.get_labels() scores = [] split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10) for i, (train_index, test_index) in enumerate(split.split(rows, Y)): samples = get_samples(db, args.depth, args.num_samples, sample_fct) row_idx = {r: i for i, r in enumerate(rows)} scheme_idx = {s: i for i, s in enumerate(samples.keys())}
def cache_user_library(user, session=None, password=None, fast=False): db_manager = Database.get_instance() db_session = db_manager.create_session() db_manager.set_user_now_caching(db_session, user.id, True) CronJobManager.logger.info('Caching %s user\'s library', user.id) try: if session == None: if user.save_credentials: password = user.password session = CronJobManager.__instance.dmm_ripper.get_session( user.email, password, fast) CronJobManager.logger.info('Obtaining a new DMM session for ' \ + 'user %s', user.id) books = CronJobManager.__instance.dmm_ripper.get_purchased_books( session) db_session.add(user) for book in books: if book['series']: serie = db_manager.get_manga_serie(db_session, book['url']) if not serie: serie = MangaSeries(title=book['name'], url=book['url'], thumbnail_dmm=book['thumbnail']) db_session.add(serie) CronJobManager.logger.info( 'Adding a new serie to DB: ' + '%s', serie.title) CronJobManager.thumbnail(db_session, serie, db_manager) CronJobManager.logger.info('Processing volumes of ' \ + 'series %s', serie.title) volumes = CronJobManager.__instance.dmm_ripper \ .get_book_volumes( session, book ) for volume in volumes: db_volume = db_manager.get_manga_volume( db_session, volume['url']) if not db_volume: volume_details = CronJobManager.__instance \ .dmm_ripper.get_book_details( session, volume['details_url'] ) db_volume = Manga( title=volume['name'], url=volume['url'], thumbnail_dmm=volume['thumbnail'], description=volume_details['description'], pages=volume_details['pages'], serie=serie) db_session.add(db_volume) CronJobManager.logger.info('Adding a new volume ' \ + ' to DB: %s', db_volume.title) CronJobManager.thumbnail(db_session, db_volume, db_manager, parent=serie) if not db_manager.user_owns_volume( db_session, user.id, db_volume.url): CronJobManager.logger.info('Adding volume to ' \ + 'user %s', user.id) try: user.book_collection.append(db_volume) db_manager.commit(db_session) except Exception as e: CronJobManager.logger.exception('Error ' \ + 'adding volume to user %s', user.id) db_manager.rollback(db_session) else: book = db_manager.get_manga_volume(db_session, book['url']) if not book: book_details = CronJobManager.__instance.dmm_ripper \ .get_book_details( session, book['details_url'] ) book = Manga(title=book['name'], url=book['url'], thumbnail_dmm=book['thumbnail'], description=book_details['description'], pages=book_details['pages'], serie=serie) db_session.add(book) CronJobManager.logger.info('Adding a new non series ' \ + 'book to DB: %s', book.title) CronJobManager.thumbnail(db_session, book, db_manager) if not db_manager.user_owns_volume(db_session, user.id, book.url): CronJobManager.logger.info('Adding non series book ' \ + 'to user %s', user.id) try: user.book_collection.append(book) db_manager.commit(session) except: CronJobManager.logger.exception('Error adding ' \ + 'non series book to user %s', user.id) db_manager.rollback(session) db_manager.set_user_cache_expire_date( db_session, user.id, CronJobManager.get_cache_expire_date()) db_manager.set_user_cache_built(db_session, user.id, True) db_manager.set_user_login_error(db_session, user.id, False) except Exception as e: CronJobManager.logger.info('Unable to login to the DMM account ' \ + 'of user %s', user.id) db_manager.set_user_login_error(db_session, user.id, True) CronJobManager.remove_scheduled_user_cache(user.id) CronJobManager.logger.exception(e) finally: CronJobManager.logger.info('%s user\'s library caching ended', user.id) db_manager.set_user_now_caching(db_session, user.id, False) db_manager.remove_session()
def collect_data(arr): db = Database() word, lex, gram, err, comment, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000' or (flag and len(docs) == 0): return [], 0, 0 elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' % \ (parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \ %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f: # f.write(req1) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')' if sentences == '()': return [], 0, 0 if s == '0001': req += ' AND document_id IN ' + sentences else: req += ' AND sent_id IN ' + sentences rows = db.execute(req) sent_num = int(db.execute(n_req)[0][0]) d_num = int(db.execute(d_req)[0][0]) return rows, sent_num, d_num
class DataFeed(): def __init__(self): self.url = "wss://ws-feed.gdax.com" self.public_client = gdax.PublicClient() self.product_ids = GDAX_PRODUCT_IDS self.order_books = {x: {} for x in self.product_ids} self.inside_order_books = { x: { "bids": {}, "asks": {} } for x in self.product_ids } self.last_trade_ids = {x: None for x in self.product_ids} self.db = Database(DATABASE['GDAX'], migrate=True) self.ws = websocket.WebSocketApp( self.url, on_message=self.on_message, on_error=self.on_error, on_open=self.on_open, ) def on_message(self, ws, msg): msg = json.loads(msg) product_id = msg['product_id'] if msg['type'] == 'snapshot': self.order_books[product_id] = { 'bids': msg['bids'], 'asks': msg['asks'] } if msg['type'] == 'l2update': changes = msg['changes'] for change in changes: change_side = 'bids' if change[0] == 'buy' else 'asks' change_price = float(change[1]) change_volume = float(change[2]) orders = self.order_books[product_id][change_side] level_index = [ i for i, order in enumerate(orders) if float(order[0]) == float(change[1]) ] if level_index: if float(change[2]) != 0: self.order_books[product_id][change_side][min( level_index)][1] = change[2] else: self.order_books[product_id][change_side].pop( min(level_index)) if not level_index: if change_side == 'bids': insert_indexes = [ i for i, order in enumerate(orders) if float(order[0]) >= float(change[1]) ] if change_side == 'asks': insert_indexes = [ i for i, order in enumerate(orders) if float(order[0]) <= float(change[1]) ] if not insert_indexes: insert_index = -1 else: insert_index = max(insert_indexes) self.order_books[product_id][change_side].insert( insert_index + 1, [change[1], change[2]]) inside_bids = { 'bids_' + str(x + 1): "@".join(self.order_books[product_id]['bids'][x][::-1]) for x in range(15) } inside_asks = { 'asks_' + str(x + 1): "@".join(self.order_books[product_id]['asks'][x][::-1]) for x in range(15) } inside_order_book = {"bids": inside_bids, "asks": inside_asks} if self.inside_order_books[product_id] != inside_order_book: row = { "server_datetime": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f%Z"), "product_id": product_id } row.update(inside_bids) row.update(inside_asks) self.db.insert_into("gdax_order_book", data=row) self.inside_order_books[product_id] = inside_order_book print(row) if msg['type'] == 'match': trades = [{ "server_datetime": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f%Z"), "exchange_datetime": msg['time'], "sequence": msg['sequence'], "trade_id": msg['trade_id'], "product_id": product_id, 'price': msg['price'], 'volume': msg['size'], 'side': msg['side'], 'backfilled': 'False' }] current_trade_id = int(msg["trade_id"]) if self.last_trade_ids[product_id]: last_trade_id = int(self.last_trade_ids[product_id]) else: last_trade_id = current_trade_id self.last_trade_ids[product_id] = msg["trade_id"] if current_trade_id > (last_trade_id + 1): missing_trade_ids = list( range(last_trade_id + 1, current_trade_id)) print("missed the following trades: " + str(missing_trade_ids)) product_trades = self.public_client.get_product_trades( product_id=product_id) for missing_trade_id in missing_trade_ids: missing_trade_index = [ i for i, product_trade in enumerate(product_trades) if int(product_trade['trade_id']) == missing_trade_id ][0] missing_product_trade = product_trades[missing_trade_index] missing_trade = { "server_datetime": datetime.datetime.now().strftime( "%Y-%m-%dT%H:%M:%S.%f%Z" ), #2017-10-15T05:10:53.700000Z "exchange_datetime": missing_product_trade['time'], "sequence": "None", "trade_id": missing_product_trade['trade_id'], "product_id": product_id, 'price': missing_product_trade['price'], 'volume': missing_product_trade['size'], 'side': missing_product_trade['side'], 'backfilled': 'True' } trades.append(missing_trade) for trade in trades: self.db.insert_into("gdax_trades", trade) print(trade) def on_error(self, ws, error): print(error) def on_open(self, ws): request = { "type": "subscribe", "product_ids": self.product_ids, "channels": ["level2", "matches"] } request = json.dumps(request) request = request.encode("utf-8") ws.send(request) def run(self): try: self.ws.run_forever() except KeyboardInterrupt: sys.exit() except Exception: pass
def compute_embedding(db): G = db.get_row_val_graph_reg() embedding, model = get_node2vec_embedding_new(G, epochs=5) return embedding, model if __name__ == "__main__": name = 'mutagenesis' embedding_name = 'testn3.pckl' path = f'Datasets/{name}' embedding_path = f'Embeddings/{name}/{embedding_name}' db = Database.load_csv(path) Y, rows = db.get_labels() scores = [] split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10) for i, (train_index, test_index) in enumerate(split.split(rows, Y)): embedding, _ = io_utils.load_or_compute(f'{embedding_path}_{i}', lambda: compute_embedding(db)) X_train = np.float32([embedding[rows[j]] for j in train_index]) X_test = np.float32([embedding[rows[j]] for j in test_index]) Y_train, Y_test = [Y[i] for i in train_index], [Y[i] for i in test_index] clf = SVC(kernel='rbf', C=1.0)
def make_tables(): db = Database() db.execute("DROP TABLE IF EXISTS word_info;", 0) db.execute( """CREATE TABLE word_info (word_id INTEGER PRIMARY KEY AUTOINCREMENT, word TEXT, definition TEXT, updater TEXT); """, 0) db.execute("DROP TABLE IF EXISTS examples;", 0) db.execute( """CREATE TABLE examples (word_id INTEGER, example TEXT); """, 0) db.execute("DROP TABLE IF EXISTS updaters;", 0) db.execute( """CREATE TABLE updaters (author_id INTEGER PRIMARY KEY AUTOINCREMENT, city TEXT, date DATE); """, 0) db.commit()
class DataFeed(): def __init__(self): self.url = "wss://api2.poloniex.com" #self.public_client = polo.PublicClient() self.product_ids = POLO_PRODUCT_IDS self.product_codes = {} self.order_books = {x: {} for x in self.product_ids} self.inside_order_books = { x: { "bids": {}, "asks": {} } for x in self.product_ids } self.last_trade_ids = {x: None for x in self.product_ids} self.db = Database(DATABASE['POLO'], migrate=True) self.ws = websocket.WebSocketApp( self.url, on_message=self.on_message, on_error=self.on_error, on_open=self.on_open, ) def on_message(self, ws, msg): msg = json.loads(msg) for message in msg[ 2]: #maybe will be better if current implementation doesn't work if message[0] == 'i': print("got ob snapshot") all_bids = message[1]['orderBook'][1] all_asks = message[1]['orderBook'][0] final_all_bids = [[ x, all_bids[x] ] for x in sorted(all_bids, key=sorting_key, reverse=True)] final_all_asks = [[x, all_asks[x]] for x in sorted(all_asks, key=sorting_key)] self.order_books[message[1]['currencyPair']] = { 'bids': final_all_bids, 'asks': final_all_asks } self.product_codes[msg[0]] = message[1]['currencyPair'] elif message[0] == 'o': change_side = 'bids' if message[1] == 1 else 'asks' orders = self.order_books[self.product_codes[ msg[0]]][change_side] level_index = [ i for i, order in enumerate(orders) if float(order[0]) == float(message[2]) ] if level_index: if float(message[3]) != 0: self.order_books[self.product_codes[msg[0]]][ change_side][min(level_index)][1] = message[3] else: self.order_books[self.product_codes[ msg[0]]][change_side].pop(min(level_index)) if not level_index: if change_side == 'bids': insert_indexes = [ i for i, order in enumerate(orders) if float(order[0]) >= float(message[1]) ] if change_side == 'asks': insert_indexes = [ i for i, order in enumerate(orders) if float(order[0]) <= float(message[1]) ] if not insert_indexes: insert_index = -1 else: insert_index = max(insert_indexes) self.order_books[self.product_codes[ msg[0]]][change_side].insert(insert_index + 1, [message[2], message[3]]) inside_bids = { 'bids_' + str(x + 1): "@".join(self.order_books[ self.product_codes[msg[0]]]['bids'][x][::-1]) for x in range(15) } inside_asks = { 'asks_' + str(x + 1): "@".join(self.order_books[ self.product_codes[msg[0]]]['asks'][x][::-1]) for x in range(15) } inside_order_book = {"bids": inside_bids, "asks": inside_asks} if self.inside_order_books[self.product_codes[ msg[0]]] != inside_order_book: row = { "server_datetime": datetime.datetime.now().strftime( "%Y-%m-%dT%H:%M:%S.%f%Z"), "product_id": self.product_codes[msg[0]] } row.update(inside_bids) row.update(inside_asks) self.db.insert_into("polo_order_book", row) self.inside_order_books[self.product_codes[ msg[0]]] = inside_order_book print(row) elif message[0] == 't': # TRADES # ["t","9394200",1,"5545.00000000","0.00009541",1508060546] # [trade, tradeId, 0/1 (sell/buy), price, amount, timestamp] #print(message) trades = [{ "server_datetime": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f%Z"), "exchange_datetime": datetime.datetime.fromtimestamp( message[5]).strftime("%Y-%m-%dT%H:%M:%S.%f%Z"), "sequence": msg[1], "trade_id": message[1], "product_id": self.product_codes[msg[0]], 'price': message[3], 'volume': message[4], 'side': 'sell' if message[2] == 0 else 'buy', 'backfilled': 'False' }] current_trade_id = int(message[1]) if self.last_trade_ids[self.product_codes[msg[0]]]: last_trade_id = int( self.last_trade_ids[self.product_codes[msg[0]]]) else: last_trade_id = current_trade_id self.last_trade_ids[self.product_codes[msg[0]]] = message[1] if current_trade_id > (last_trade_id + 1): missing_trade_ids = list( range(last_trade_id + 1, current_trade_id)) print("missed the following trades: " + str(missing_trade_ids)) for trade in trades: self.db.insert_into("polo_trades", trade) print(trade) def on_error(self, ws, error): print(error) def on_open(self, ws): #request = { # "type": "subscribe", # "channel": "BTC_ETH"} #request = json.dumps(request) #request = request.encode("utf-8") for x in self.product_ids: ws.send(json.dumps({'command': 'subscribe', 'channel': x})) #ws.send(request) def run(self): try: self.ws.run_forever() except KeyboardInterrupt: sys.exit() except Exception: pass
def collect_full_data(arr): db = Database() word, lex, gram, err, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000': return [], 0, 0 elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req = '''SELECT DISTINCT sent_id, num FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req += 'AND doc_id IN ('+','.join(docs)+')' elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template rows = db.execute(req) # sent_num = int(db.execute(n_req)[0][0]) # d_num = int(db.execute(d_req)[0][0]) return rows, 0,0
def __init__(self, lang): self.lang = lang self.db_manager = Database.get_instance() InlineQueryHandler.__init__(self, self.inline_query, pass_chat_data=True)