def _determine_income_lt(original_ld2): if get_key(original_ld2, 'income.income_less_than_five_thousand'): return True elif get_key(original_ld2, 'income.income_five_thousand_or_more'): return False else: return None
def get_avg_coll_rate(): """ Function to get average number of collisions on hourly/daily/monthly/yearly basis as specified :return: """ result = 0 if num_coll_time[0] == utils.HOUR: for i in range(0, 24): key = utils.get_key(utils.TIME, city, utils.HOUR, i) result = result + int(r.get(key) if r.exists(key) else 0) result = result / 24 elif num_coll_time[0] == utils.DAY: for i in range(0, 7): key = utils.get_key(utils.TIME, city, utils.DAY, utils.get_day(i)) result = result + int(r.get(key) if r.exists(key) else 0) result = result / 7 elif num_coll_time[0] == utils.MONTH: for i in range(1, 13): key = utils.get_key(utils.TIME, city, utils.MONTH, utils.get_month(i)) result = result + int(r.get(key) if r.exists(key) else 0) result = result / 12 elif num_coll_time[0] == utils.YEAR: for i in range(utils.START_YEAR, utils.CURRENT_YEAR + 1): key = utils.get_key(utils.TIME, city, utils.YEAR, i) result = result + int(r.get(key) if r.exists(key) else 0) result = result / (utils.CURRENT_YEAR - utils.START_YEAR + 1) else: raise ValueError('Invalid type of duration: ', avg_coll_rate_type) return result
def check_single_dup(ids_dict, row, local_clips_path): print( "------------------------------------------------------------------------" ) print('UID 0:', row['uid_1'].to_string(index=False)) print('UID 1:', row['uid_2'].to_string(index=False)) print() path_0 = get_key(row['uid_1'].to_string(index=False).strip(), ids_dict) path_1 = get_key(row['uid_2'].to_string(index=False).strip(), ids_dict) print('Path 0:', path_0) print('Path 1:', path_1) print() print('IS PFXIX' if row['is_psfix'].to_string( index=False).strip() == '1' else 'NOT PSFIX') print() print('Name distance:', row['name_dist'].to_string(index=False)) print() print('Sound Distance:', row['sound_dist'].to_string(index=False)) while True: play_songs(row['uid_1'].to_string(index=False).strip(), row['uid_2'].to_string(index=False).strip(), local_clips_path) keep = user_confirmation( "Enter 1 to keep one song (duplicates), 2 to keep both (not duplicates), or anything else to repeat" ) if keep not in ['1', '2']: continue # TO DO: Change to automatic extraction of which to keep if keep == '1': keep = longer_path(path_0, path_1) return int(keep)
def solve(problem, weight): start_node = Node(problem.start_state) start_node_key = get_key(start_node.state) start_node_value = start_node.path_cost + weight * problem.get_heuristic( start_node.state) open_list = OpenList() open_list.add(start_node, start_node_value) closed_set = set() best_value = float('inf') generated_states = set() generated_states.add(start_node_key) path_costs = {start_node_key: start_node.path_cost} while open_list: current_node = open_list.remove() current_node_value = current_node.path_cost + problem.get_heuristic( current_node.state) if current_node_value < best_value: current_node_key = get_key(current_node.state) closed_set.add(current_node_key) for child_node in get_children_nodes(problem, current_node): child_node_value = child_node.path_cost + problem.get_heuristic( child_node.state) child_node_key = get_key(child_node.state) if child_node_value < best_value: if problem.is_goal(child_node.state): path_costs[child_node_key] = child_node.path_cost best_value = child_node_value solution = get_solution(child_node) cost = len(solution) print({'cost': cost, 'solution': solution}) elif child_node_key in closed_set or child_node in open_list: if path_costs[child_node_key] > child_node.path_cost: path_costs[child_node_key] = child_node.path_cost value = path_costs[ child_node_key] + weight * problem.get_heuristic( child_node.state) if child_node_key in closed_set: closed_set.remove(child_node_key) open_list.add(child_node, value) else: path_costs[child_node_key] = child_node.path_cost value = path_costs[ child_node_key] + weight * problem.get_heuristic( child_node.state) open_list.add(child_node, value) return None
def detect_poss_duplicates(ids_dict, params_path_data, params_list_data, n_neighbors=5): curr_params_data = unpack_params(params_path_data, params_list_data) load_from_data = 'small_dataset' for i in params_list_data: load_from_data += '_' + str(i) load_from_data += '.csv' load_from_data = os.path.join(params_path_data[-1], load_from_data) save_to = 'possible_duplicates' save_to += '_data_(' + str(params_list_data[0]) for i in params_list_data[1:]: save_to += '_' + str(i) save_to += ').csv' print("Creating possible duplicates file...") print("Using dataset parameters: " + str(curr_params_data)) print("Saving at: " + save_to) print("Loading dataset from: " + load_from_data) df_dups = pd.read_csv(load_from_data, names=['UniqueID', 'f1', 'f2']) matrix_2d = df_dups.values[:, 1:] neigh = NearestNeighbors(n_neighbors=n_neighbors) neigh.fit(matrix_2d) dist_mat, idx_mat = neigh.kneighbors(matrix_2d) df_dups['name'] = [ os.path.split(get_key(df_dups.iloc[j, 0], ids_dict))[1] for j in range(idx_mat.shape[0]) ] for i in range(n_neighbors): df_dups['uid_' + str(i)] = [df_dups.iloc[x, 0] for x in idx_mat[:, i]] df_dups['name_' + str(i)] = [ os.path.split(get_key(df_dups.iloc[idx_mat[j, i], 0], ids_dict))[1] for j in range(idx_mat.shape[0]) ] df_dups['sound_dist_' + str(i)] = dist_mat[:, i] df_dups['name_dist_' + str(i)] = [ -jellyfish.jaro_winkler(df_dups['name'][j], df_dups['name_' + str(i)][j]) + 1 for j in range(idx_mat.shape[0]) ] df_dups['is_psfix_' + str(i)] = [ is_psfix(df_dups['name'][j], df_dups['name_' + str(i)][j]) for j in range(idx_mat.shape[0]) ] to_drop = ['f1', 'f2'] df_dups = df_dups.drop(to_drop, axis=1) df_dups.to_csv(save_to, index=False, header=True, encoding='utf-8')
def _select_key(self, line): """ Determine key-value split of items """ for key in self.key_exceptions: if line.startswith(key): return key for key_length, items in self.key_length.items(): for item in items: if line.startswith(item): return get_key(line, key_length) return get_key(line, len(line.split())-1)
def update(self, obj, attributes=None): """ Make values from a given object available. """ for attr in attributes: value = getattr(obj, attr) if callable(value): value = value() self.update_data[get_key(obj, attr)] = value
def transform(message, data): logging.info("Transforming: message=[%s] data=[%s]" % (message, data)) t_data = data storage = message['storage'] data_type = message['type'] out_type = output_type(data_type, storage) logging.info("output type: %s" % out_type) if out_type == 'yaml': t_data = yaml.dump(data) elif out_type == 'json': t_data = json.dumps(data) message['app'] = message['name'] message['data'] = t_data # flavor spring? if utils.has_key('spring.profiles', data): spring_profile = utils.get_key('spring.profiles', data) n_pattern = "%s/%s" if storage == 'vault' else "%s-%s" message['name'] = n_pattern % (message['name'], spring_profile) return message
def get_pairs(self, preclusters=None): """ Get all unique the pairs of sequences in input_info, skipping where preclustered out """ all_pairs = itertools.combinations(self.input_info.keys(), 2) if preclusters == None: print ' ?? lines (no preclustering)' # % len(list(all_pairs)) NOTE I'm all paranoid the list conversion will be slow (although it doesn't seem to be a.t.m.) return all_pairs else: # if we've already run preclustering, skip the pairs that we know aren't matches preclustered_pairs = [] n_lines, n_preclustered, n_previously_preclustered, n_removable, n_singletons = 0, 0, 0, 0, 0 for a_name, b_name in all_pairs: key = utils.get_key((a_name, b_name)) # NOTE shouldn't need this any more: if a_name not in preclusters.query_clusters or b_name not in preclusters.query_clusters: # singletons (i.e. they were already preclustered into their own group) n_singletons += 1 continue if key not in preclusters.pairscores: # preclustered out in a previous preclustering step n_previously_preclustered += 1 continue if preclusters.query_clusters[a_name] != preclusters.query_clusters[b_name]: # not in same cluster n_preclustered += 1 continue if preclusters.is_removable(preclusters.pairscores[key]): # in same cluster, but score (link) is long. i.e. *this* pair is far apart, but other seqs to which they are linked are close to each other n_removable += 1 continue preclustered_pairs.append((a_name, b_name)) n_lines += 1 print ' %d lines (%d preclustered out, %d removable links, %d singletons, %d previously preclustered)' % (n_lines, n_preclustered, n_removable, n_singletons, n_previously_preclustered) return preclustered_pairs
def song_subset(audio_path, server_subpaths='All', ids_dict=None): s_subset = [] if server_subpaths == 'All': for root, dirs, files in os.walk(audio_path): for name in files: filedir = os.path.join(root, name) filename, file_extension = os.path.splitext(filedir) unique_id, _ = os.path.splitext(name) if file_extension in ['.wav']: s_subset.append((unique_id, filedir)) return set(s_subset) elif server_subpaths == 'FINAL_East African Popular Music Archive' and ids_dict != None: for root, dirs, files in os.walk(audio_path): for name in files: filedir = os.path.join(root, name) filename, file_extension = os.path.splitext(filedir) unique_id, _ = os.path.splitext(name) key = get_key(unique_id, ids_dict) if "2_EastAfricanArchive" in key and "FINAL_East African Popular Music Archive" not in key: continue elif file_extension in ['.wav']: s_subset.append((unique_id, filedir)) return set(s_subset) elif server_subpaths == 'Remove duplicates': pass
def get_pairs(self, preclusters=None): """ Get all unique the pairs of sequences in input_info, skipping where preclustered out """ all_pairs = itertools.combinations(self.input_info.keys(), 2) if preclusters == None: print ' ?? lines (no preclustering)' # % len(list(all_pairs)) NOTE I'm all paranoid the list conversion will be slow (although it doesn't seem to be a.t.m.) return all_pairs else: # if we've already run preclustering, skip the pairs that we know aren't matches preclustered_pairs = [] n_lines, n_preclustered, n_previously_preclustered, n_removable, n_singletons = 0, 0, 0, 0, 0 for a_name, b_name in all_pairs: key = utils.get_key((a_name, b_name)) # NOTE shouldn't need this any more: if a_name not in preclusters.query_clusters or b_name not in preclusters.query_clusters: # singletons (i.e. they were already preclustered into their own group) n_singletons += 1 continue if key not in preclusters.pairscores: # preclustered out in a previous preclustering step n_previously_preclustered += 1 continue if preclusters.query_clusters[ a_name] != preclusters.query_clusters[ b_name]: # not in same cluster n_preclustered += 1 continue if preclusters.is_removable( preclusters.pairscores[key] ): # in same cluster, but score (link) is long. i.e. *this* pair is far apart, but other seqs to which they are linked are close to each other n_removable += 1 continue preclustered_pairs.append((a_name, b_name)) n_lines += 1 print ' %d lines (%d preclustered out, %d removable links, %d singletons, %d previously preclustered)' % ( n_lines, n_preclustered, n_removable, n_singletons, n_previously_preclustered) return preclustered_pairs
def find_extremum_time(begin, end, duration_type, extremum_type, dur_func): """ Function to find duration (of duration type) during which collisions is either minimum or maximum :param begin: :param end: :param duration_type: :param extremum_type: :param dur_func: :return: """ max = sys.maxsize min = -sys.maxsize - 1 result = 0 if extremum_type == utils.DANGEROUS: result = min elif extremum_type == utils.SAFEST: result = max else: raise ValueError('Invalid type of extremum type: ', extremum_type) dur = dur_func(1) for i in range(begin, end): curr_dur = dur_func(i) key = utils.get_key(utils.TIME, city, duration_type, curr_dur) curr_val = int(r.get(key) if r.exists(key) else 0) if extremum_type == utils.DANGEROUS: if curr_val > result: result = curr_val dur = curr_dur else: if curr_val < result: result = curr_val dur = curr_dur return dur, result
def get_stat(value): clean_db(db) stats = {'lose': 0, 'victory': 0} if value == 'month': date = datetime.datetime.now() keys = db.prefix(str(date.month) + str(date.year)) for key in keys: for key_ in db[key].keys(): stats[key_] += db[key][key_] winrate = int(stats['victory']) / \ (int(stats['victory']) + int(stats['lose'])) return f'''Your stats for this month: - Victories: {stats['victory']} - Loses: {stats['lose']} - Winrate: {round(winrate, 2) * 100} % ''' else: key = get_key() if key in db.keys(): for key_ in db[key]: stats[key_] += db[key][key_] winrate = int(stats['victory']) / \ (int(stats['victory']) + int(stats['lose'])) return f'''Your stats for this day: - Victories: {stats['victory']} - Loses: {stats['lose']} - Winrate: {round(winrate, 2) * 100} % ''' else: return 'You haven\'t stats today'
def _postprocess_ld2(transformed_ld2, original_ld2): _transformed_ld2 = transformed_ld2.copy() _transformed_ld2['report_type']['quarter'] = _determine_quarter(original_ld2) _transformed_ld2['expense_reporting_method'] = _determine_expense_method(original_ld2) if get_key(original_ld2, 'report_type.no_activity'): _transformed_ld2['lobbying_activities'] = [] _transformed_ld2['expense_less_than_five_thousand'] = _determine_expense_lt(original_ld2) _transformed_ld2['income_less_than_five_thousand'] = _determine_income_lt(original_ld2) return _transformed_ld2
def add_value(value): key = get_key() if key in db.keys(): stats = db[key] if value in stats.keys(): stats[value] += 1 else: stats[value] = 1 db[key] = stats else: db[key] = {value: 1}
def get(self): args = self.reqparse.parse_args() token = args['token'] register = game['register'] for player in register: if register[player] == token: player_number = utils.get_key(register, token) win_number = game['wins'][player_number] return win_number else: return -1
def get_article_urls(query='', page=0): yielded = [] url = 'https://www.hmetro.com.my/search?s={}{}'.format( query, '' if page == 0 or not isinstance(page, int) else '&page={}'.format(page)) for url in filter( map(filter(open_soup(url).find_all('a'), has_key('href')), get_key('href')), text_match(r'^/.+?/\d{4}/\d{2}/\d{6}/.+$')): url = 'https://www.hmetro.com.my{}'.format(url) if url in yielded: continue yielded.append(url) yield page, url, get_article(open_soup(url))
def single_link(self, input_scores=None, infname=None, debug=False, reco_info=None, outfile=None): if infname is None: assert input_scores is not None else: assert input_scores is None # should only specify <input_scores> *or* <infname> input_scores = [] with opener('r')(infname) as infile: reader = csv.DictReader(infile) for line in reader: input_scores.append(line) sorted_lines = sorted(input_scores, key=lambda k: float(k['logprob'])) for line in sorted_lines: a_name = line['id_a'] b_name = line['id_b'] score = float(line['logprob']) dbg_str_list = ['%22s %22s %8.3f' % (a_name, b_name, score), ] if reco_info is None: dbg_str_list[-1] += ' %s' % ('-') else: from_same_event = utils.from_same_event(reco_info, [a_name, b_name]) dbg_str_list[-1] += ' %d' % (from_same_event) self.incorporate_into_clusters(a_name, b_name, score, dbg_str_list) self.pairscores[(utils.get_key((a_name, b_name)))] = score self.plotscores['all'].append(score) if reco_info is not None: if from_same_event: self.plotscores['same'].append(score) else: self.plotscores['diff'].append(score) if debug: outstr = ''.join(dbg_str_list) if outfile == None: print outstr else: outfile.write(outstr + '\n') for query, cluster_id in self.query_clusters.iteritems(): if cluster_id not in self.id_clusters: self.id_clusters[cluster_id] = [] self.id_clusters[cluster_id].append(query) for cluster_id, queries in self.id_clusters.items(): if len(queries) == 1: self.singletons.append(queries[0]) # print 'nearest',self.nearest_true_mate out_str_list = [' %d clusters:\n'%len(self.id_clusters), ] for cluster_id in self.id_clusters: out_str_list.append(' ' + ' '.join([str(x) for x in self.id_clusters[cluster_id]]) + '\n') if outfile == None: print ''.join(out_str_list) else: outfile.write(''.join(out_str_list))
def solve(problem): start_node = Node(problem.start_state) start_node_value = start_node.path_cost + problem.get_heuristic( start_node.state) open_list = OpenList() open_list.add(start_node, start_node_value) closed_list = set() while open_list: current_node = open_list.remove() if problem.is_goal(current_node.state): return get_solution(current_node) current_node_key = get_key(current_node.state) closed_list.add(current_node_key) for child_node in get_children_nodes(problem, current_node): child_node_key = get_key(child_node.state) if child_node_key not in closed_list and child_node not in open_list: child_node_value = child_node.path_cost + problem.get_heuristic( child_node.state) open_list.add(child_node, child_node_value) elif child_node in open_list: stored_child_node = open_list[child_node] child_node_value = child_node.path_cost + problem.get_heuristic( child_node.state) stored_child_node_value = stored_child_node.path_cost + problem.get_heuristic( stored_child_node.state) if child_node_value < stored_child_node_value: del open_list[stored_child_node] open_list.add(child_node, child_node_value) return None
def linear_conflict(state, final_state): ret = 0 size = len(state) keys = list(state.keys()) for index, key_j in enumerate(keys): if index + 1 < size: key_k = keys[index + 1] if state[key_j] != 0 and state[key_k] != 0 and key_k[1] == key_j[1]: goal_key_j = get_key(final_state, state[key_j]) goal_key_k = get_key(final_state, state[key_k]) if key_j[1] == goal_key_j[1] and key_k[1] == goal_key_k[ 1] and state[key_j] > state[key_k]: ret += 1 key_k = (key_k[1], key_k[0]) key_j = (key_j[1], key_j[0]) if state[key_j] != 0 and state[key_k] != 0 and key_k[0] == key_j[0]: goal_key_j = get_key(final_state, state[key_j]) goal_key_k = get_key(final_state, state[key_k]) if key_j[0] == goal_key_j[0] and key_k[0] == goal_key_k[ 0] and state[key_j] > state[key_k]: ret += 1 return manhattan(state, final_state) + 2 * ret
def get_num_coll_loc(): """ Function to get number of collisions in specified location (can be specified off/on/cross street) :return: """ result = 0 if num_coll_loc[0] == utils.OFF_STREET: result = r.get( utils.get_key(utils.LOCATION, city, utils.OFF_STREET, num_coll_loc[1])) elif num_coll_loc[0] == utils.ON_STREET: result = r.get( utils.get_key(utils.LOCATION, city, utils.ON_STREET, num_coll_loc[1])) elif num_coll_loc[0] == utils.CROSS_STREET: result = r.get( utils.get_key(utils.LOCATION, city, utils.CROSS_STREET, num_coll_loc[1])) else: raise ValueError('Invalid type of location: ', num_coll_loc[0]) return result
def main(self): # get the api key self.api_key = get_key() # initialize the data file self.init_data_file() # initialize the addresses self.init_addresses() # initialize the bloom filter self.init_bf() # open the connection and start recieving data asyncio.get_event_loop().run_until_complete(self.listen()) # keep listening asyncio.get_event_loop().run_forever() logger.info('main end')
def update(word: str, lives: int, guessed: List[str]) -> int: paint(guessed) key = get_key() if key.isalpha(): positions = find_all_elements(word, key) if not positions: lives = lost_life(lives) else: for i in positions: guessed[i] = key clear_screen() return lives
def get(): key = get_key() result = process_custom_command(key) if result is False: if storage_backend.has_key(key): result = storage_backend.get(key) else: result = empty_gzip if isinstance(result, str): result = gzip.compress(result.encode("utf-8")) return Response(result, content_type="application/octet-stream")
def delete_value(value): key = get_key() if key in db.keys(): stats = db[key] if value in stats.keys(): if stats[value] != 0: stats[value] -= 1 db[key] = stats return 'Deleted' else: return 'This parameter is clear' else: return 'You haven\'t loses today'
def _generate_leveldb(file_path, image_paths, targets, width, height): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\t\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 10000 start_time = int(round(time.time() * 1000)) for idx in range(len(image_paths)): # Each image is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = utils.get_key(idx) # Do common normalization that might happen across both testing and validation. try: image = _preprocess_data( _load_numpy_image(image_paths[idx], width, height)) except: print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[ idx] continue # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() datum.channels = 3 # RGB datum.height = height datum.width = width datum.data = image.tostring() datum.label = targets[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % ( key, total_time) start_time = int(round(time.time() * 1000)) end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWriting final batch, time for batch: %d ms" % total_time wb.write() db.close()
def callback_inline(call): table_name = '{0}_{1.id}'.format(month, call.from_user) try: if call.message: if call.data == 'count': bot.send_message( call.message.chat.id, 'Всего: ' + str(count(table_name)) + ' движений') elif call.data == 'all': bot.send_message(call.message.chat.id, get_all(table_name)) elif call.data in TRANSLATE.values(): val = get_key(TRANSLATE, call.data) table = '{0}_{1}_{2.id}'.format(val, date.strftime('%Y'), call.from_user) try: bot.send_message(call.message.chat.id, get_all(table)) bot.send_message(call.message.chat.id, 'Всего: ' + str(i_got(table))) except Exception as e: logger.error('Ошибка запроса. Несуществующий месяц ' + repr(e)) bot.send_message(call.message.chat.id, 'В том месяце голяк.. Выбери другой!') elif call.data == '500': add_summ(500, table_name) bot.send_message(call.message.chat.id, 'Пятиха... Ну не так уж и плохо :)') elif call.data == '1000': add_summ(1000, table_name) bot.send_message(call.message.chat.id, 'Касарик прилетел!') else: bot.send_message(call.message.chat.id, 'Миша, давай нпоновой! :( ') # remove inline buttons bot.edit_message_text(chat_id=call.message.chat.id, message_id=call.message.message_id, text='Найс!', reply_markup=None) # show alert bot.answer_callback_query(callback_query_id=call.id, show_alert=False, text="Я ЗАПОМНИЛ ;) ") except Exception as e: # print('Ошибка callback_query: ' + repr(e)) logger.error('Ошибка callback_query: ' + repr(e))
def menu(menu_text: str, options: List[MenuOption[MenuReturn]]) -> MenuReturn: options_list: List[str] = [] options_dict: Dict[str, MenuFunction[MenuReturn]] = {} for i, option in enumerate(options, start=1): options_list.append(f"{i}) {option[0]}") options_dict[str(i)] = option[1] options_text = '\n'.join(options_list) print(f"{clean_lines(menu_text)}\n", "\n" "Options available:\n" "\n" f"{clean_lines(options_text)}" "\n") key: str = get_key("Please choose an option: ", lambda k: k in options_dict and k or None) return options_dict[key]() # call the chosen option
def get_num_coll_time(): """ Function to get number of collisions in specified duration (can be specified hour/day/month/year) :return: """ result = 0 if num_coll_time[0] == utils.HOUR: result = r.get( utils.get_key(utils.TIME, city, utils.HOUR, int(num_coll_time[1]))) elif num_coll_time[0] == utils.DAY: result = r.get( utils.get_key(utils.TIME, city, utils.DAY, utils.get_day(int(num_coll_time[1])))) elif num_coll_time[0] == utils.MONTH: result = r.get( utils.get_key(utils.TIME, city, utils.MONTH, utils.get_month(int(num_coll_time[1])))) elif num_coll_time[0] == utils.YEAR: result = r.get( utils.get_key(utils.TIME, city, utils.HOUR, int(num_coll_time[1]))) else: raise ValueError('Invalid type of duration: ', num_coll_time[0]) return result
def post(self): args = self.reqparse.parse_args() token = args['token'] move = args['move'] register = game['register'] status = game['status'] player = utils.get_key(register, token) if player != -1: game['status'][player] = move if status['player1'] != None and status['player2'] != None: previousGame = utils.play(status['player1'], status['player2']) status['previousGame'] = previousGame print(game) return 1 else: return -1
def _generate_leveldb(file_path, image_paths, targets, width, height): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\t\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 10000 start_time = int(round(time.time() * 1000)) for idx in range(len(image_paths)): # Each image is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = utils.get_key(idx) # Do common normalization that might happen across both testing and validation. try: image = _preprocess_data(_load_numpy_image(image_paths[idx], width, height)) except: print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[idx] continue # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() datum.channels = 3 # RGB datum.height = height datum.width = width datum.data = image.tostring() datum.label = targets[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % (key, total_time) start_time = int(round(time.time() * 1000)) end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWriting final batch, time for batch: %d ms" % total_time wb.write() db.close()
def ask(screen, question): """ask(screen, question) -> answer""" from utils import get_key pygame.font.init() current_string = '' display_box(screen, question + ": " + current_string) while 1: inkey = get_key() if inkey == K_BACKSPACE: current_string = current_string[0:-1] elif inkey == K_RETURN: break elif inkey == K_ESCAPE: return False elif inkey <= 127: current_string += chr(inkey) display_box(screen, question + ": " + current_string) return current_string
def _generate_leveldb(self, file_path, image, target, single_data): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 250000 start_time = int(round(time.time() * 1000)) for idx in range(len(pairs)): # Each image is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = utils.get_key(idx) # Do things like mean normalize, etc. that happen across both testing and validation. paired_image = self._preprocess_data(paired_image) # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() # TODO(neuberg): Confirm that this is the correct way to setup RGB images for # Caffe for our dataset. datum.channels = 3 datum.height = constants.HEIGHT datum.width = constants.WIDTH datum.data = image.tostring() datum.label = target[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "Wrote batch, key: %s, time for batch: %d ms" % (key, total_time) start_time = int(round(time.time() * 1000)) wb.write() db.close()
def _generate_leveldb(self, file_path, pairs, target): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print("Generating LevelDB file at %s ..." % file_path) shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 500 start_time = int(round(time.time() * 1000)) for i in range(len(pairs)): key = utils.get_key(i) image_1, image_2 = pairs[i] paired_image = np.concatenate([image_1, image_2]) paired_image = self._preprocess_data(paired_image) datum = caffe.io.caffe_pb2.Datum() datum.channels = 2 datum.height = constants.HEIGHT datum.width = constants.WIDTH datum.data = paired_image.tostring() datum.label = target[i] value = datum.SerializeToString() wb.put(key, value) if (i + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print("Wrote batch, key: %s, time for batch: %d ms" % (key, total_time)) start_time = int(round(time.time() * 1000)) wb.write() db.close()
def kowalski_analyze( browser_driver, item_name, item_info, buy_orders, sell_orders, classified_url, current_key_price, particle_effect): item_info[1] = utils.convert_currency(item_info[1], current_key_price) quickbuy_coefficient = -1 so_ratio = -1 traders_coefficient = -1 lowest_so_to_bp_price = 1000 if buy_orders[0] is not None and item_info[1] is not None: traders_coefficient = buy_orders[0][0] / float(item_info[1]) if not sell_orders[0]: sell_orders[0] == 0 if sell_orders[0] and buy_orders[0] and sell_orders[0] is not None and len(sell_orders[0]) != 0: if sell_orders[0][0] is not None and sell_orders[0][0]: quickbuy_coefficient = buy_orders[0][0] / sell_orders[0][0] if sell_orders[0] and sell_orders[0] is not None and len(sell_orders[0]) != 0: if len(sell_orders[0]) >= 2: if sell_orders[0][0] is not None and sell_orders[0][1] is not None: so_ratio = sell_orders[0][0] / sell_orders[0][1] if item_info[1] is not None: lowest_so_to_bp_price = sell_orders[0][0] / item_info[1] if traders_coefficient >= 0.75 >= so_ratio and lowest_so_to_bp_price < 1: # quickbuy_coefficient >= 0.75 and effect_name = utils.get_key(config.particles_dict, int(particle_effect)) print(f'{effect_name} {item_name}') print(f'url: {classified_url}') print(f'bo_count: {buy_orders[1]} | bo_prices: {buy_orders[0]}') print(f'so_count: {sell_orders[1]} | so_count: {sell_orders[0]}') print(f'bp price: {item_info[1]}') print(f'quickbuy_coefficient: {quickbuy_coefficient}') print(f'traders_coefficient: {traders_coefficient}') print(f'lowest_so_to_bp_price: {lowest_so_to_bp_price}') else: print(datetime.datetime.now())
def evaluate(encoder, decoder, sentence, source_lang, target_lang): out_seq = [] with torch.no_grad(): in_tensor = sentence_to_tensor(sentence, source_lang, device) ctx_vec, outputs = encode_seq(encoder, in_tensor, device=device) input = torch.tensor([[Tokens.SOS.value]]).to(device=device) hidden = ctx_vec for idx in range(args.max_words): output, hidden = decoder(input, hidden, outputs) topv, topi = output.topk(1) if topi.item() == Tokens.EOS.value: break else: out_seq.append(get_key(target_lang.dict, topi.item())) input = topi.squeeze().detach() return out_seq
def enter_drive_mode(self): if self.connected_to_droid: print('\nPreparing for drive mode...\n') self.set_stance(1, _print=False) self.drive_mode = True print('\nControls:\n%s\n' % utils.get_drive_mode_controls_text()) print('Ready for keyboard input...\n') speed, angle = 0, self.angle while True: key = utils.get_key() break_, speed, angle = self.process_key(key, speed, angle) if break_: break print('Exiting drive move...\n') self.drive_mode = False self.set_stance(2, _print=False) else: print('You must connect to a droid before you can enter drive mode')
def get_sbg_keys(): return get_key('sbgkey')
def cluster(self, input_scores=None, infname=None, debug=False, reco_info=None, outfile=None, plotdir=''): if infname is None: assert input_scores is not None else: assert input_scores is None # should only specify <input_scores> *or* <infname> input_scores = [] with opener('r')(infname) as infile: reader = csv.DictReader(infile) for line in reader: input_scores.append(line) sorted_lines = sorted(input_scores, key=lambda k: float(k['score'])) for line in sorted_lines: a_name = line['id_a'] b_name = line['id_b'] score = float(line['score']) from_same_event = -1 if (reco_info == None or a_name not in reco_info or b_name not in reco_info) else reco_info[a_name]['reco_id'] == reco_info[b_name]['reco_id'] dbg_str_list = ['%22s %22s %8.3f %d' % (a_name, b_name, score, from_same_event), ] self.incorporate_into_clusters(a_name, b_name, score, dbg_str_list) self.pairscores[(utils.get_key((a_name, b_name)))] = score self.plotscores['all'].append(score) if reco_info != None: if from_same_event: self.plotscores['same'].append(score) else: self.plotscores['diff'].append(score) # if reco_info != None and reco_info[a_name]['reco_id'] == reco_info[b_name]['reco_id']: # for query,score in {a_name:score, b_name:score}.iteritems(): # if query not in self.nearest_true_mate: # self.nearest_true_mate[query] = score # elif self.greater_than and score > self.nearest_true_mate[query]: # self.nearest_true_mate[query] = score # elif not self.greater_than and score < self.nearest_true_mate[query]: # self.nearest_true_mate[query] = score if debug: outstr = ''.join(dbg_str_list) if outfile == None: print outstr else: outfile.write(outstr + '\n') if plotdir != '': utils.prep_dir(plotdir + '/plots', '*.svg') hists = {} for htype in ['all', 'same', 'diff']: hists[htype] = plotting.make_hist_from_list(self.plotscores[htype], htype + '_pairscores') hists[htype].SetTitle(htype) plotting.draw(hists['all'], 'float', plotdir=plotdir, plotname='pairscores', more_hists=[hists['same'], hists['diff']]) check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg']) check_call(['./bin/permissify-www', plotdir]) for query, cluster_id in self.query_clusters.iteritems(): if cluster_id not in self.id_clusters: self.id_clusters[cluster_id] = [] self.id_clusters[cluster_id].append(query) for cluster_id, queries in self.id_clusters.items(): if len(queries) == 1: self.singletons.append(queries[0]) # print 'nearest',self.nearest_true_mate out_str_list = [' %d clusters:\n'%len(self.id_clusters), ] for cluster_id in self.id_clusters: out_str_list.append(' ' + ' '.join([str(x) for x in self.id_clusters[cluster_id]]) + '\n') if outfile == None: print ''.join(out_str_list) else: outfile.write(''.join(out_str_list))
def get_s3_keys(): return get_key('sbgs3key')
def get_access_keys(): keys = get_key() # TODO: store with appropriate server, for now default to testportal keys['default']['server'] = 'https://testportal.4dnucleome.org' return keys
def read_hmm_output(self, algorithm, hmm_csv_outfname, make_clusters=True, count_parameters=False, parameter_out_dir=None, plotdir=None): print ' read output' if count_parameters: assert parameter_out_dir is not None assert plotdir is not None pcounter = ParameterCounter(self.germline_seqs) if count_parameters else None true_pcounter = ParameterCounter(self.germline_seqs) if (count_parameters and not self.args.is_data) else None perfplotter = PerformancePlotter(self.germline_seqs, plotdir + '/hmm/performance', 'hmm') if self.args.plot_performance else None n_processed = 0 hmminfo = [] with opener('r')(hmm_csv_outfname) as hmm_csv_outfile: reader = csv.DictReader(hmm_csv_outfile) last_key = None boundary_error_queries = [] for line in reader: utils.intify(line, splitargs=('unique_ids', 'seqs')) ids = line['unique_ids'] this_key = utils.get_key(ids) same_event = from_same_event(self.args.is_data, True, self.reco_info, ids) id_str = ''.join(['%20s ' % i for i in ids]) # check for errors if last_key != this_key: # if this is the first line for this set of ids (i.e. the best viterbi path or only forward score) if line['errors'] != None and 'boundary' in line['errors'].split(':'): boundary_error_queries.append(':'.join([str(uid) for uid in ids])) else: assert len(line['errors']) == 0 if algorithm == 'viterbi': line['seq'] = line['seqs'][0] # add info for the best match as 'seq' line['unique_id'] = ids[0] utils.add_match_info(self.germline_seqs, line, self.cyst_positions, self.tryp_positions, debug=(self.args.debug > 0)) if last_key != this_key or self.args.plot_all_best_events: # if this is the first line (i.e. the best viterbi path) for this query (or query pair), print the true event n_processed += 1 if self.args.debug: print '%s %d' % (id_str, same_event) if line['cdr3_length'] != -1 or not self.args.skip_unproductive: # if it's productive, or if we're not skipping unproductive rearrangements hmminfo.append(dict([('unique_id', line['unique_ids'][0]), ] + line.items())) if pcounter is not None: # increment counters (but only for the best [first] match) pcounter.increment(line) if true_pcounter is not None: # increment true counters true_pcounter.increment(self.reco_info[ids[0]]) if perfplotter is not None: perfplotter.evaluate(self.reco_info[ids[0]], line) if self.args.debug: self.print_hmm_output(line, print_true=(last_key != this_key), perfplotter=perfplotter) line['seq'] = None line['unique_id'] = None else: # for forward, write the pair scores to file to be read by the clusterer if not make_clusters: # self.args.debug or print '%3d %10.3f %s' % (same_event, float(line['score']), id_str) if line['score'] == '-nan': print ' WARNING encountered -nan, setting to -999999.0' score = -999999.0 else: score = float(line['score']) if len(ids) == 2: hmminfo.append({'id_a':line['unique_ids'][0], 'id_b':line['unique_ids'][1], 'score':score}) n_processed += 1 last_key = utils.get_key(ids) if pcounter is not None: pcounter.write(parameter_out_dir) if not self.args.no_plot: pcounter.plot(plotdir, subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions) if true_pcounter is not None: true_pcounter.write(parameter_out_dir + '/true') if not self.args.no_plot: true_pcounter.plot(plotdir + '/true', subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions) if perfplotter is not None: perfplotter.plot() print ' processed %d queries' % n_processed if len(boundary_error_queries) > 0: print ' %d boundary errors (%s)' % (len(boundary_error_queries), ', '.join(boundary_error_queries)) return hmminfo
# Level 0 classifiers clfs = [ ExtraTreesClassifier(**utils.read_estimator_params(s, "et")), LogisticRegression(**utils.read_estimator_params(s, "lr")), RandomForestClassifier(**utils.read_estimator_params(s, "rf")) ] # First, run grid search (if enabled) to find the best estimator results_1 = [] for clf in clfs: ts = time.time() clf_name = type(clf).__name__ model = utils.find_best_estimator(clf, X_train, y_train, section=s) preds = model.predict_proba(X_valid) log_loss = metrics.log_loss(y_valid, preds) results_1.append((utils.get_key(clf_name), model, log_loss)) logger.info("Trained {} in {:.2f} seconds, Log loss : {:.6f}" .format(clf_name, (time.time() - ts), log_loss)) # Sort by log_loss results_1.sort(key=lambda tup: tup[2]) logger.info(tabulate(zip([r[0] for r in results_1], [r[2] for r in results_1]), floatfmt=".4f", headers=("model", "log_loss"))) clfs = [clf[1] for clf in results_1] # required for blending stage # Next, run stacked generalization (blending) logger.info("Start blending") results_2 = [] for i in xrange(cfg[s]["n_blends"]): print("Iteration {}".format(i)) bclf, b_t, log_loss = run_stacked_generalization(clfs, train, target)