def remove_cattributes(self,bot,update,args): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id try: cattribute_to_remove = str.lower(args[0]) except IndexError: reply_message = "Can't remove an empty string! Please use the following format: /rmcattributes cattributename" update.message.reply_text(reply_message,parse_mode='HTML') else: cur.execute("""SELECT * FROM Attribute WHERE telegram_id = %s AND LOWER(attribute_name) = LOWER(%s) """,(uid,cattribute_to_remove,)) if cur.rowcount == 0: reply_message = "Can't find that cattribute! please use /listcattributes to show your cattributes" update.message.reply_text(reply_message,parse_mode='HTML') else: cur.execute("""DELETE FROM Attribute WHERE telegram_id = %s AND LOWER (attribute_name) = LOWER(%s) """,(uid,cattribute_to_remove,)) reply_message = "".join([str.lower(cattribute_to_remove), " has been sucessfully removed"]) update.message.reply_text(reply_message,parse_mode='HTML') except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def __init__(self, input_file, reuse): self.scope = {} self.function = None self.loop = None self.tokens = Tokens(input_file) self.allocator = Allocator(reuse) self.structs = []
def process_multiple(log, do_fetch=True, do_parse=True, do_merge=True): root = config["data-dir"] if do_fetch: tokens = Tokens() api = API(tokens, log) util.delete_files(root + '/processing/invoices', '*.json') success, invoice_cnt = api.fetch_invoice_details(hours_delta=30, tz_offset=7) if success and invoice_cnt > 0: log.write( "INFO api invoices extraction succeeded {:,} invoices saved to : {}" .format(invoice_cnt, '/processing/invoices')) elif success and invoice_cnt == 0: log.write( "INFO api no invoices extracted (no new/updated invoices in refresh period)" ) return True else: log.write( "ERROR api invoices extraction failed {:,} invoices saved to : {}" .format(invoice_cnt, '/processing/invoices')) return False if do_parse: util.delete_files(root + '/processing/invoices', '*.csv') parser = Parser(log) parser.parse('invoices-line-items') if do_merge: merger = Merger(log) merger.merge_invoice_delta() return True
def process_single(log, do_fetch=True, do_parse=True): root = config["data-dir"] if do_fetch: tokens = Tokens() api = API(tokens, log) util.delete_files(root + '/processing/default', '*.json') if not api.fetch_data("items"): return False if not api.fetch_data("branding-themes"): return False if not api.fetch_data("contacts"): return False if not api.fetch_data("invoices"): return False if do_parse: util.delete_files(root + '/processing/default', '*.csv') parser = Parser(log) parser.parse('branding-themes') parser.parse('items') parser.parse('contacts') parser.parse('invoices') return True
def add_cattributes(self,bot,update,args): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id try: cattribute_to_add = str.lower(args[0]) except IndexError: reply_message = "Can't add an empty string! Please use the following format: /addcattributes cattributename" update.message.reply_text(reply_message,parse_mode='HTML') else: cur.execute("""SELECT * FROM Attribute WHERE telegram_id = %s AND LOWER(attribute_name) = LOWER(%s) """,(uid,cattribute_to_add,)) if cur.rowcount > 0: reply_message = "This cattribute is already added!" update.message.reply_text(reply_message,parse_mode='HTML') else: cur.execute("""INSERT INTO Attribute VALUES(%s,%s)""",(uid,cattribute_to_add)) reply_message = "".join([str.lower(cattribute_to_add)," has been added to the table"]) update.message.reply_text(reply_message,parse_mode='HTML') except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def retrieval_cosine(self, query): scores = dict() tokens = Tokens() query_terms = tokens.edit_query(query) query_weights = dict(collections.Counter(query_terms).items()) for query in query_weights: query_weights[query] = tf(query_weights[query]) for query in query_weights: term = query doc_ids = self.L(term) for doc in doc_ids: if doc[0] not in scores: scores[doc[0]] = tf(doc[1]) * tf(query_weights[term]) else: new_value = scores[doc[0]] + tf(doc[1]) * tf( query_weights[term]) scores[doc[0]] = new_value for id in scores: norm_value = self.inverted_index.get_norms(str(id)) scores[id] = round( scores[id] / (norm_value * self.get_query_norms(query_weights)), 2) scores = sorted(scores.items(), key=lambda kv: kv[1], reverse=True) return scores
def find_venue_boundary_tokens(self): recorder = {} for raw_segment, observation_sequence, label_sequence in zip( self.raw_segments, self.observation_sequences, self.label_sequences): first_target_label_flag = True tokens = Tokens(raw_segment).tokens for token, feature_vector, label in zip(tokens, observation_sequence, label_sequence): # First meet a VN label if label == 4 and first_target_label_flag: key = token.lower() if not key.islower(): continue if recorder.has_key(key): recorder[key] += 1 else: recorder[key] = 1 first_target_label_flag = False elif (first_target_label_flag is False) and label in [0, 1, 3]: first_target_label_flag = True for k, v in recorder.iteritems(): print k, '\t', v return recorder
def main(wordCorpus): min_df = 2 tokenType = 'stopped' if (wordCorpus == 'twenty-news'): groupIndices = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ] elif (wordCorpus == 'acl-imdb'): groupIndices = [0, 1] nClusters = len(groupIndices) for groupIndex in groupIndices: tokensLists, className = Tokens(wordCorpus).getTokens( tokenType, groupIndex) flat_list = [ tokens for tokensList in tokensLists for tokens in tokensList ] text = ' '.join(flat_list) wordcloud = WordCloud(max_font_size=40, width=600, height=400, background_color='white', max_words=200, relative_scaling=1.0).generate_from_text(text) plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") wordcloud.to_file('./results/' + className + '.jpg')
def decode(self, segment): # print segment observation_sequence, decoded_sequence = self.HMMentire.decode(segment) self.observations_raw.append(segment) self.observation_sequences.append(observation_sequence) self.labels.append(decoded_sequence) # segment the labeling into parts author_field = [] title_field = [] venue_field = [] year_field = [] raw_tokens = Tokens(segment).tokens for i in range(len(decoded_sequence)): token_i = raw_tokens[i] label_i = decoded_sequence[i] if label_i in [0, 1]: author_field.append(token_i) if label_i == 2: continue if label_i == 3: title_field.append(token_i) if label_i == 4: venue_field.append(token_i) if label_i == 5: year_field.append(token_i) return ' '.join(author_field), ' '.join(title_field), ' '.join( venue_field), list(set(year_field))
def __init__(self, record): super(AuthorFeatureBuilder, self).__init__() self.record = record self.tokens = Tokens(record).tokens self.num_tokens = len(self.tokens) self.features = None # list of list of features for every name; e.g. [[1,1,1,1],[...], ...] self.NUM_REGEX = re.compile('\d') self.DELIMITERS = [ ',', '.', ';', ] self.NAME_LIST = [ item.strip() for item in open('data/name.lst', 'r').readlines() ] self.pipeline = [ 'f_is_capitalized', 'f_is_all_upper', 'f_is_english', 'f_is_punctuation', 'f_is_sequential_punctuation', 'f_has_digit', 'f_is_all_digit', 'f_is_in_namelist', 'f_is_fname_abbrev', 'f_is_preceeded_by_delimiter', 'f_is_followed_by_delimiter', 'f_is_an_and_between_two_names', ] self.build()
def decode_without_constraints(self, segment): print segment observation_sequence, decoded_sequence = self.HMMentire.decode_without_constraints( segment) self.observations_raw.append(segment) self.observation_sequences.append(observation_sequence) self.labels.append(decoded_sequence) for vector, decoding, token in zip(observation_sequence, decoded_sequence, Tokens(segment).tokens): if decoding == 0: label = 'FN' elif decoding == 1: label = 'LN' elif decoding == 2: label = 'DL' elif decoding == 3: label = 'TI' elif decoding == 4: label = 'VN' elif decoding == 5: label = 'YR' else: label = str(decoding) + ', PROBLEM' print vector, '\t', label, '\t', token print '\n\n'
def translate(self, readline, result=None, no_imports=None): # Tracker to keep track of information as the file is processed self.tokens = Tokens(self.default_kls) self.tracker = Tracker(result, self.tokens, self.wrapped_setup) # Add import stuff at the top of the file if self.import_tokens and no_imports is not True: self.tracker.add_tokens(self.import_tokens) # Looking at all the tokens with self.tracker.add_phase() as tracker: for tokenum, value, (_, scol), _, _ in generate_tokens(readline): self.tracker.next_token(tokenum, value, scol) # Add attributes to our Describes so that the plugin can handle some nesting issues # Where we have tests in upper level describes being run in lower level describes if self.with_describe_attrs: self.tracker.add_tokens(self.tracker.make_describe_attrs()) # If setups should be wrapped, then do this at the bottom if self.wrapped_setup: self.tracker.add_tokens(self.tracker.wrapped_setups()) # Add lines to bottom of file to add __testname__ attributes self.tracker.add_tokens(self.tracker.make_method_names()) # Return translated list of tokens return self.tracker.result
def __init__(self): Token = Tokens() self.reddit = praw.Reddit(client_id=Token['client_id'], client_secret=Token['client_secret'], user_agent='my-user-agent', username=Token['username'], password=Token['password'])
def get_prev_token(self): self.pos -= 2 tok = Tokens(self.code[self.pos] + " " + self.code[self.pos+1]) if tok.type == TokensType.INVALID: raise InterpreterException(f"Invalid token at position : {self.pos}") return tok
def set_index_dic(self, my_tweets_dic): for tweet_id in self.my_tweets_dic: tokens = Tokens() tokens.remove_stopwords(self.my_tweets_dic[tweet_id]) current_id = tweet_id for word in tokens.reduced_tokens: self.replace_in_dic(word, current_id) sorted_dic = dict(sorted(self.dic.items())) return sorted_dic
def getX(wordCorpus, tokenType, groupIndices): XAll = [] indexList = {} start = 0 for groupIndex in groupIndices: X, className = Tokens(wordCorpus).getTokens(tokenType, groupIndex) end = start + len(X) indexList[className] = {'start': start, 'end': end} XAll = XAll + X start = end XAll = np.array([np.array(xi) for xi in XAll]) # rows: Docs. columns: words return XAll, indexList
def cancel(self,bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id message = "And here I was, thinking we could be friends :(" update.message.reply_text(message,parse_mode='HTML') cur.execute("""DELETE FROM User WHERE telegram_id = %s""",(uid,)) cur.execute("""DELETE FROM Attribute WHERE telegram_id = %s""",(uid,)) return ConversationHandler.END except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def forget(self,bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id cur.execute("""SELECT * FROM User WHERE telegram_id = %s""",(uid,)) if cur.rowcount == 0: message = "Can't delete what doesn't exist, man" update.message.reply_text(message,parse_mode='HTML') else: cur.execute("""DELETE FROM User WHERE telegram_id = %s""",(uid,)) cur.execute("""DELETE FROM Attribute WHERE telegram_id = %s""",(uid,)) message = "Oh, I'll tell you all about it when I see you again" update.message.reply_text(message,parse_mode='HTML') except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def list_cattributes(self,bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id cur.execute("""SELECT * FROM Attribute WHERE telegram_id = %s""",(uid,)) if cur.rowcount == 0: message = "You have no cattributes listed!" update.message.reply_text(message,parse_mode='HTML') else: cattributes = cur.fetchall() catlist = [x[1] for x in cattributes] message = "".join(['Your current cattributes are: ',(", ".join(catlist))]) update.message.reply_text(message,parse_mode='HTML') except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def token_interface(): if request.method == 'POST': phone_number = request.form['phone_number'] dept_id = request.form['departments'] stream_id = request.form['streams'] token_day_number = _get_current_token() attending = 1 token = Tokens(token_day_number=token_day_number, phone_number=phone_number, department=dept_id, stream=stream_id) db.session.add(token) db.session.commit() dept_name = Departments.query.filter_by(id=dept_id).first().name stream_name = Streams.query.filter_by(id=stream_id).first().name return render_template('generated_token.html', dept_name=dept_name, token_number=token_day_number, attending=attending, token=token, stream_name=stream_name) departments = Departments.query.all() streams = Streams.query.all() return render_template('token_interface.html', departments= departments, streams=streams)
def run(self): i = 0 self.new_labels = [] for raw_segment, label_sequence in zip(self.raw_segments, self.label_sequences): new_labels = self.hmm_new.decode(raw_segment)[1] self.new_labels.append(new_labels) tokens = Tokens(raw_segment).tokens feature_vectors = FeatureGenerator(raw_segment).features print i, ': ', raw_segment for token, old_label, new_label, feature_vector in zip( tokens, label_sequence, new_labels, feature_vectors): print to_label(old_label), '\t', to_label( new_label), '\t', token self.feature_entity_list.add_entity( feature_vector, old_label, token) #???? Old label first print '\n' i += 1
def getX(wordCorpus, tokenType, listOfClasses): XAll = [] indexList = {} groupIndices = listOfClasses.split(',') start = 0 for groupIndex in groupIndices: X, className = Tokens(wordCorpus).getTokens(tokenType, groupIndex) end = start + len(X) indexList[className] = {'start': start, 'end': end} logger.info('True Group Index {}, classname: {}'.format( groupIndex, className)) logger.info('Count {}, start - End Indices {} , {}'.format( len(X), start, end)) XAll = XAll + X start = end XAll = np.array([np.array(xi) for xi in XAll]) # rows: Docs. columns: words logger.info('indexList{}'.format(indexList)) return XAll, indexList
def Cryptokitties(): print("Cryptokitties online") updater = Updater(token=Tokens().bot_token()) dispatcher = updater.dispatcher # registering for users to a database. conv_handler = ConversationHandler( entry_points=[CommandHandler('register', Commands().register)], states={ GENERATION:[MessageHandler(Filters.text,Commands().generation)], COOLDOWN:[MessageHandler(Filters.text,Commands().cooldown)], OFFSTART:[MessageHandler(Filters.text,Commands().offstart)], OFFEND:[MessageHandler(Filters.text,Commands().offend)], ATTLIST: [MessageHandler(Filters.text,Commands().attribute_list)] }, fallbacks=[CommandHandler('cancel', Commands().cancel)], per_user = '******' ) dispatcher.add_handler(conv_handler,1) forget_handler = CommandHandler('forget', Commands().forget) dispatcher.add_handler(forget_handler) alert_handler = CommandHandler('alert',Commands().alert) dispatcher.add_handler(alert_handler) list_handler = CommandHandler('listcattributes',Commands().list_cattributes) dispatcher.add_handler(list_handler) remove_cattribute = CommandHandler('rmcattributes',Commands().remove_cattributes, pass_args=True) dispatcher.add_handler(remove_cattribute) add_cattribute = CommandHandler('addcattributes',Commands().add_cattributes, pass_args=True) dispatcher.add_handler(add_cattribute) ######################################################## # Alert jobs ######################################################## j = updater.job_queue job_minute = j.run_repeating(Commands().user_broadcast,600,0) updater.start_polling() updater.idle
def refresh_tokens(tokens: Tokens) -> Tokens: logging.debug("Try to refresh tokens") data = { "grant_type": "refresh_token", "refresh_token": tokens.refresh_token } headers = make_auth_http_headers() r = requests.post(url=URI_HH_OAUTH_TOKEN, headers=headers, data=data) logging.info(r.text) if r.status_code == 200: js = r.json() r = Tokens(access_token=js["access_token"], refresh_token=js["refresh_token"]) logging.debug("Tokens: %s", r) return r else: logging.error("status_code: %s; response: %s", r.status_code, r.text) return tokens
def offend(self,bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id try: int(update.message.text) except ValueError: message = "Please send me an integer :)" update.message.reply_text(message,parse_mode='HTML') return OFFEND else: cur.execute("""UPDATE User SET offset_end = %s WHERE telegram_id = %s""",(update.message.text,uid,)) message = "Thank you, now, please key in a cattribute(one cattribute at a time only!)" message += "This bot will match the cattributes you are looking for" update.message.reply_text(message,parse_mode='HTML') return ATTLIST except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def attribute_list(self,bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id if str.lower(update.message.text) == "end": message = "Thanks for registering =)\n" message = "If you want to toggle 10 minute scans, please do a /alert" update.message.reply_text(message,parse_mode='HTML') return ConversationHandler.END else: cur.execute("""INSERT INTO Attribute VALUES(%s,%s)""",(uid,update.message.text,)) message= update.message.text message += " has been added as an attribute. Please enter the next attribute \n" message += "If you're done with adding your cattributes, please reply with end" update.message.reply_text(message,parse_mode='HTML') return ATTLIST except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def cooldown(self,bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id try: int(update.message.text) except ValueError: message = "Please send me an integer :)" update.message.reply_text(message,parse_mode='HTML') return COOLDOWN else: cur.execute("""UPDATE User SET cooldown_index = %s WHERE telegram_id = %s""",(update.message.text,uid,)) message = "Fantastic. Now, may I please have the offset starting point?" message += "This bot will scan the api starting at the offset given. We recomend starting at 0" update.message.reply_text(message,parse_mode='HTML') return OFFSTART except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def generation(self,bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id try: int(update.message.text) except ValueError: message = "Please send me an integer :)" update.message.reply_text(message,parse_mode='HTML') return GENERATION else: cur.execute("""UPDATE User SET generation_index = %s WHERE telegram_id = %s""",(update.message.text,uid,)) message = "Fantastic. Now, may I please have a cooldown index?" message += "This bot will scan for the cooldown index less than the number that you input" update.message.reply_text(message,parse_mode='HTML') return COOLDOWN except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')
def __init__(self, record): super(VenueFeatureBuilder, self).__init__() self.record = record self.tokens = Tokens(record).tokens self.num_tokens = len(self.tokens) self.features = None self.NUM_REGEX = re.compile('\d') self.CHAR_DIGIT_MIX_REGEX = re.compile( '((^[a-zA-Z]+\d{4}$)|(^[a-zA-Z]+\d{2}$))|((^\d{4}[a-zA-Z]+$)|(^\d{2}[a-zA-Z]+$))', re.MULTILINE) self.DELIMITERS = [ ',', '.', ';', ] self.VENUE_LIST = [ item.strip() for item in open('data/venue.lst', 'r').readlines() ] self.ORDINAL_LIST = [ item.strip() for item in open('data/ordinal.lst', 'r').readlines() ] self.pipeline = [ 'f_is_capitalized', 'f_is_all_upper', 'f_is_english', 'f_has_both_char_and_digit', 'f_is_ordinal', 'f_is_punctuation', 'f_has_digit', 'f_is_all_digit', 'f_is_in_venuelist', 'f_is_preceeded_by_delimiter', 'f_is_followed_by_delimiter', 'f_is_followed_by_year', ] self.build()
def register(self, bot,update): try: with closing(self.conn.cursor()) as cur: uid = update.message.from_user.id cur.execute("""SELECT telegram_id FROM User WHERE telegram_id = %s""",(uid,)) if cur.rowcount == 0: message = "Registering you in my database! \n" message += "Can I please have the generation index?\n" message += "This bot will search for the generation index less than or equals to the number you input\n" message += "If you feel threatened at any point of time, do a /cancel to abort this conversation." cur.execute("""INSERT INTO User VALUES(%s,NULL,NULL,NULL,NULL,'No')""",(uid,)) update.message.reply_text(message,parse_mode='HTML') return GENERATION else: message = "You are already registered in my database. To remove your details, do a /forget" update.message.reply_text(message,parse_mode='HTML') return ConversationHandler.END except Exception as e: catcherror = traceback.format_exc() bot.sendMessage(chat_id=Tokens().error_channel(),text=catcherror,parse_mode='HTML')