def _determine_income_lt(original_ld2):
     if get_key(original_ld2, 'income.income_less_than_five_thousand'):
         return True
     elif get_key(original_ld2, 'income.income_five_thousand_or_more'):
         return False
     else:
         return None
Ejemplo n.º 2
0
def get_avg_coll_rate():
    """
    Function to get average number of collisions on hourly/daily/monthly/yearly basis as specified
    :return:
    """
    result = 0
    if num_coll_time[0] == utils.HOUR:
        for i in range(0, 24):
            key = utils.get_key(utils.TIME, city, utils.HOUR, i)
            result = result + int(r.get(key) if r.exists(key) else 0)
        result = result / 24
    elif num_coll_time[0] == utils.DAY:
        for i in range(0, 7):
            key = utils.get_key(utils.TIME, city, utils.DAY, utils.get_day(i))
            result = result + int(r.get(key) if r.exists(key) else 0)
        result = result / 7
    elif num_coll_time[0] == utils.MONTH:
        for i in range(1, 13):
            key = utils.get_key(utils.TIME, city, utils.MONTH,
                                utils.get_month(i))
            result = result + int(r.get(key) if r.exists(key) else 0)
        result = result / 12
    elif num_coll_time[0] == utils.YEAR:
        for i in range(utils.START_YEAR, utils.CURRENT_YEAR + 1):
            key = utils.get_key(utils.TIME, city, utils.YEAR, i)
            result = result + int(r.get(key) if r.exists(key) else 0)
        result = result / (utils.CURRENT_YEAR - utils.START_YEAR + 1)
    else:
        raise ValueError('Invalid type of duration: ', avg_coll_rate_type)

    return result
Ejemplo n.º 3
0
def check_single_dup(ids_dict, row, local_clips_path):
    print(
        "------------------------------------------------------------------------"
    )
    print('UID 0:', row['uid_1'].to_string(index=False))
    print('UID 1:', row['uid_2'].to_string(index=False))
    print()
    path_0 = get_key(row['uid_1'].to_string(index=False).strip(), ids_dict)
    path_1 = get_key(row['uid_2'].to_string(index=False).strip(), ids_dict)
    print('Path 0:', path_0)
    print('Path 1:', path_1)
    print()
    print('IS PFXIX' if row['is_psfix'].to_string(
        index=False).strip() == '1' else 'NOT PSFIX')
    print()
    print('Name distance:', row['name_dist'].to_string(index=False))
    print()
    print('Sound Distance:', row['sound_dist'].to_string(index=False))
    while True:
        play_songs(row['uid_1'].to_string(index=False).strip(),
                   row['uid_2'].to_string(index=False).strip(),
                   local_clips_path)
        keep = user_confirmation(
            "Enter 1 to keep one song (duplicates), 2 to keep both (not duplicates), or anything else to repeat"
        )
        if keep not in ['1', '2']:
            continue
        # TO DO: Change to automatic extraction of which to keep
        if keep == '1':
            keep = longer_path(path_0, path_1)

        return int(keep)
Ejemplo n.º 4
0
def solve(problem, weight):
    start_node = Node(problem.start_state)
    start_node_key = get_key(start_node.state)
    start_node_value = start_node.path_cost + weight * problem.get_heuristic(
        start_node.state)

    open_list = OpenList()
    open_list.add(start_node, start_node_value)

    closed_set = set()
    best_value = float('inf')

    generated_states = set()
    generated_states.add(start_node_key)

    path_costs = {start_node_key: start_node.path_cost}

    while open_list:
        current_node = open_list.remove()
        current_node_value = current_node.path_cost + problem.get_heuristic(
            current_node.state)

        if current_node_value < best_value:
            current_node_key = get_key(current_node.state)
            closed_set.add(current_node_key)

            for child_node in get_children_nodes(problem, current_node):
                child_node_value = child_node.path_cost + problem.get_heuristic(
                    child_node.state)
                child_node_key = get_key(child_node.state)

                if child_node_value < best_value:
                    if problem.is_goal(child_node.state):
                        path_costs[child_node_key] = child_node.path_cost
                        best_value = child_node_value

                        solution = get_solution(child_node)
                        cost = len(solution)

                        print({'cost': cost, 'solution': solution})
                    elif child_node_key in closed_set or child_node in open_list:
                        if path_costs[child_node_key] > child_node.path_cost:
                            path_costs[child_node_key] = child_node.path_cost
                            value = path_costs[
                                child_node_key] + weight * problem.get_heuristic(
                                    child_node.state)

                            if child_node_key in closed_set:
                                closed_set.remove(child_node_key)

                            open_list.add(child_node, value)
                    else:
                        path_costs[child_node_key] = child_node.path_cost
                        value = path_costs[
                            child_node_key] + weight * problem.get_heuristic(
                                child_node.state)
                        open_list.add(child_node, value)

    return None
Ejemplo n.º 5
0
def detect_poss_duplicates(ids_dict,
                           params_path_data,
                           params_list_data,
                           n_neighbors=5):

    curr_params_data = unpack_params(params_path_data, params_list_data)

    load_from_data = 'small_dataset'
    for i in params_list_data:
        load_from_data += '_' + str(i)
    load_from_data += '.csv'
    load_from_data = os.path.join(params_path_data[-1], load_from_data)

    save_to = 'possible_duplicates'
    save_to += '_data_(' + str(params_list_data[0])
    for i in params_list_data[1:]:
        save_to += '_' + str(i)
    save_to += ').csv'

    print("Creating possible duplicates file...")
    print("Using dataset parameters: " + str(curr_params_data))
    print("Saving at: " + save_to)
    print("Loading dataset from: " + load_from_data)

    df_dups = pd.read_csv(load_from_data, names=['UniqueID', 'f1', 'f2'])
    matrix_2d = df_dups.values[:, 1:]

    neigh = NearestNeighbors(n_neighbors=n_neighbors)
    neigh.fit(matrix_2d)
    dist_mat, idx_mat = neigh.kneighbors(matrix_2d)

    df_dups['name'] = [
        os.path.split(get_key(df_dups.iloc[j, 0], ids_dict))[1]
        for j in range(idx_mat.shape[0])
    ]

    for i in range(n_neighbors):
        df_dups['uid_' + str(i)] = [df_dups.iloc[x, 0] for x in idx_mat[:, i]]
        df_dups['name_' + str(i)] = [
            os.path.split(get_key(df_dups.iloc[idx_mat[j, i], 0], ids_dict))[1]
            for j in range(idx_mat.shape[0])
        ]
        df_dups['sound_dist_' + str(i)] = dist_mat[:, i]
        df_dups['name_dist_' + str(i)] = [
            -jellyfish.jaro_winkler(df_dups['name'][j],
                                    df_dups['name_' + str(i)][j]) + 1
            for j in range(idx_mat.shape[0])
        ]
        df_dups['is_psfix_' + str(i)] = [
            is_psfix(df_dups['name'][j], df_dups['name_' + str(i)][j])
            for j in range(idx_mat.shape[0])
        ]

    to_drop = ['f1', 'f2']
    df_dups = df_dups.drop(to_drop, axis=1)
    df_dups.to_csv(save_to, index=False, header=True, encoding='utf-8')
 def _select_key(self, line):
     """ Determine key-value split of items """
     for key in self.key_exceptions:
         if line.startswith(key):
             return key
     for key_length, items in self.key_length.items():
         for item in items:
             if line.startswith(item):
                 return get_key(line, key_length)
     return get_key(line, len(line.split())-1)
Ejemplo n.º 7
0
 def update(self, obj, attributes=None):
     """ Make values from a given object available. """
     for attr in attributes:
         value = getattr(obj, attr)
         if callable(value):
             value = value()
         self.update_data[get_key(obj, attr)] = value
Ejemplo n.º 8
0
def transform(message, data):
    logging.info("Transforming: message=[%s] data=[%s]" % (message, data))

    t_data = data
    storage = message['storage']
    data_type = message['type']

    out_type = output_type(data_type, storage)
    logging.info("output type: %s" % out_type)

    if out_type == 'yaml':
        t_data = yaml.dump(data)

    elif out_type == 'json':
        t_data = json.dumps(data)

    message['app'] = message['name']
    message['data'] = t_data

    # flavor spring?
    if utils.has_key('spring.profiles', data):
        spring_profile = utils.get_key('spring.profiles', data)
        n_pattern = "%s/%s" if storage == 'vault' else "%s-%s"
        message['name'] = n_pattern % (message['name'], spring_profile)

    return message
Ejemplo n.º 9
0
 def get_pairs(self, preclusters=None):
     """ Get all unique the pairs of sequences in input_info, skipping where preclustered out """
     all_pairs = itertools.combinations(self.input_info.keys(), 2)
     if preclusters == None:
         print '    ?? lines (no preclustering)'  # % len(list(all_pairs)) NOTE I'm all paranoid the list conversion will be slow (although it doesn't seem to be a.t.m.)
         return all_pairs
     else:  # if we've already run preclustering, skip the pairs that we know aren't matches
         preclustered_pairs = []
         n_lines, n_preclustered, n_previously_preclustered, n_removable, n_singletons = 0, 0, 0, 0, 0
         for a_name, b_name in all_pairs:
             key = utils.get_key((a_name, b_name))
             # NOTE shouldn't need this any more:
             if a_name not in preclusters.query_clusters or b_name not in preclusters.query_clusters:  # singletons (i.e. they were already preclustered into their own group)
                 n_singletons += 1
                 continue
             if key not in preclusters.pairscores:  # preclustered out in a previous preclustering step
                 n_previously_preclustered += 1
                 continue
             if preclusters.query_clusters[a_name] != preclusters.query_clusters[b_name]:  # not in same cluster
                 n_preclustered += 1
                 continue
             if preclusters.is_removable(preclusters.pairscores[key]):  # in same cluster, but score (link) is long. i.e. *this* pair is far apart, but other seqs to which they are linked are close to each other
                 n_removable += 1
                 continue
             preclustered_pairs.append((a_name, b_name))
             n_lines += 1
         print '    %d lines (%d preclustered out, %d removable links, %d singletons, %d previously preclustered)' % (n_lines, n_preclustered, n_removable, n_singletons, n_previously_preclustered)
         return preclustered_pairs
Ejemplo n.º 10
0
def song_subset(audio_path, server_subpaths='All', ids_dict=None):

    s_subset = []

    if server_subpaths == 'All':
        for root, dirs, files in os.walk(audio_path):
            for name in files:
                filedir = os.path.join(root, name)
                filename, file_extension = os.path.splitext(filedir)
                unique_id, _ = os.path.splitext(name)
                if file_extension in ['.wav']:
                    s_subset.append((unique_id, filedir))
        return set(s_subset)

    elif server_subpaths == 'FINAL_East African Popular Music Archive' and ids_dict != None:
        for root, dirs, files in os.walk(audio_path):
            for name in files:
                filedir = os.path.join(root, name)
                filename, file_extension = os.path.splitext(filedir)
                unique_id, _ = os.path.splitext(name)
                key = get_key(unique_id, ids_dict)
                if "2_EastAfricanArchive" in key and "FINAL_East African Popular Music Archive" not in key:
                    continue
                elif file_extension in ['.wav']:
                    s_subset.append((unique_id, filedir))
        return set(s_subset)

    elif server_subpaths == 'Remove duplicates':
        pass
Ejemplo n.º 11
0
Archivo: base.py Proyecto: APSL/Adjax
 def update(self, obj, attributes=None):
     """ Make values from a given object available. """
     for attr in attributes:
         value = getattr(obj, attr)
         if callable(value):
             value = value()
         self.update_data[get_key(obj, attr)] = value
 def get_pairs(self, preclusters=None):
     """ Get all unique the pairs of sequences in input_info, skipping where preclustered out """
     all_pairs = itertools.combinations(self.input_info.keys(), 2)
     if preclusters == None:
         print '    ?? lines (no preclustering)'  # % len(list(all_pairs)) NOTE I'm all paranoid the list conversion will be slow (although it doesn't seem to be a.t.m.)
         return all_pairs
     else:  # if we've already run preclustering, skip the pairs that we know aren't matches
         preclustered_pairs = []
         n_lines, n_preclustered, n_previously_preclustered, n_removable, n_singletons = 0, 0, 0, 0, 0
         for a_name, b_name in all_pairs:
             key = utils.get_key((a_name, b_name))
             # NOTE shouldn't need this any more:
             if a_name not in preclusters.query_clusters or b_name not in preclusters.query_clusters:  # singletons (i.e. they were already preclustered into their own group)
                 n_singletons += 1
                 continue
             if key not in preclusters.pairscores:  # preclustered out in a previous preclustering step
                 n_previously_preclustered += 1
                 continue
             if preclusters.query_clusters[
                     a_name] != preclusters.query_clusters[
                         b_name]:  # not in same cluster
                 n_preclustered += 1
                 continue
             if preclusters.is_removable(
                     preclusters.pairscores[key]
             ):  # in same cluster, but score (link) is long. i.e. *this* pair is far apart, but other seqs to which they are linked are close to each other
                 n_removable += 1
                 continue
             preclustered_pairs.append((a_name, b_name))
             n_lines += 1
         print '    %d lines (%d preclustered out, %d removable links, %d singletons, %d previously preclustered)' % (
             n_lines, n_preclustered, n_removable, n_singletons,
             n_previously_preclustered)
         return preclustered_pairs
Ejemplo n.º 13
0
def find_extremum_time(begin, end, duration_type, extremum_type, dur_func):
    """
    Function to find duration (of duration type) during which collisions is either minimum or maximum
    :param begin:
    :param end:
    :param duration_type:
    :param extremum_type:
    :param dur_func:
    :return:
    """
    max = sys.maxsize
    min = -sys.maxsize - 1
    result = 0
    if extremum_type == utils.DANGEROUS:
        result = min
    elif extremum_type == utils.SAFEST:
        result = max
    else:
        raise ValueError('Invalid type of extremum type: ', extremum_type)
    dur = dur_func(1)
    for i in range(begin, end):
        curr_dur = dur_func(i)
        key = utils.get_key(utils.TIME, city, duration_type, curr_dur)
        curr_val = int(r.get(key) if r.exists(key) else 0)
        if extremum_type == utils.DANGEROUS:
            if curr_val > result:
                result = curr_val
                dur = curr_dur
        else:
            if curr_val < result:
                result = curr_val
                dur = curr_dur

    return dur, result
Ejemplo n.º 14
0
def get_stat(value):
    clean_db(db)
    stats = {'lose': 0, 'victory': 0}
    if value == 'month':
        date = datetime.datetime.now()
        keys = db.prefix(str(date.month) + str(date.year))
        for key in keys:
            for key_ in db[key].keys():
                stats[key_] += db[key][key_]
        winrate = int(stats['victory']) / \
            (int(stats['victory']) + int(stats['lose']))
        return f'''Your stats for this month:
    - Victories: {stats['victory']}
    - Loses: {stats['lose']}
    - Winrate: {round(winrate, 2) * 100} %
    '''
    else:
        key = get_key()
        if key in db.keys():
            for key_ in db[key]:
                stats[key_] += db[key][key_]
            winrate = int(stats['victory']) / \
                (int(stats['victory']) + int(stats['lose']))
            return f'''Your stats for this day:
        - Victories: {stats['victory']}
        - Loses: {stats['lose']}
        - Winrate: {round(winrate, 2) * 100} %
        '''
        else:
            return 'You haven\'t stats today'
 def _postprocess_ld2(transformed_ld2, original_ld2):
     _transformed_ld2 = transformed_ld2.copy()
     _transformed_ld2['report_type']['quarter'] = _determine_quarter(original_ld2)
     _transformed_ld2['expense_reporting_method'] = _determine_expense_method(original_ld2)
     if get_key(original_ld2, 'report_type.no_activity'):
         _transformed_ld2['lobbying_activities'] = []
     _transformed_ld2['expense_less_than_five_thousand'] = _determine_expense_lt(original_ld2)
     _transformed_ld2['income_less_than_five_thousand'] = _determine_income_lt(original_ld2)
     return _transformed_ld2
Ejemplo n.º 16
0
def add_value(value):
    key = get_key()
    if key in db.keys():
        stats = db[key]
        if value in stats.keys():
            stats[value] += 1
        else:
            stats[value] = 1
        db[key] = stats
    else:
        db[key] = {value: 1}
    def get(self):
        args = self.reqparse.parse_args()
        token = args['token']
        register = game['register']
        for player in register:
            if register[player] == token:
                player_number = utils.get_key(register, token)
                win_number = game['wins'][player_number]
                return win_number

        else:
            return -1
Ejemplo n.º 18
0
def get_article_urls(query='', page=0):
    yielded = []
    url = 'https://www.hmetro.com.my/search?s={}{}'.format(
        query, ''
        if page == 0 or not isinstance(page, int) else '&page={}'.format(page))
    for url in filter(
            map(filter(open_soup(url).find_all('a'), has_key('href')),
                get_key('href')), text_match(r'^/.+?/\d{4}/\d{2}/\d{6}/.+$')):
        url = 'https://www.hmetro.com.my{}'.format(url)
        if url in yielded: continue
        yielded.append(url)
        yield page, url, get_article(open_soup(url))
Ejemplo n.º 19
0
    def single_link(self, input_scores=None, infname=None, debug=False, reco_info=None, outfile=None):
        if infname is None:
            assert input_scores is not None
        else:
            assert input_scores is None  # should only specify <input_scores> *or* <infname>
            input_scores = []
            with opener('r')(infname) as infile:
                reader = csv.DictReader(infile)
                for line in reader:
                    input_scores.append(line)
        sorted_lines = sorted(input_scores, key=lambda k: float(k['logprob']))
        for line in sorted_lines:
            a_name = line['id_a']
            b_name = line['id_b']
            score = float(line['logprob'])
            dbg_str_list = ['%22s %22s   %8.3f' % (a_name, b_name, score), ]
            if reco_info is None:
                dbg_str_list[-1] += '   %s' % ('-')
            else:
                from_same_event = utils.from_same_event(reco_info, [a_name, b_name])
                dbg_str_list[-1] += '   %d' % (from_same_event)
            self.incorporate_into_clusters(a_name, b_name, score, dbg_str_list)
            self.pairscores[(utils.get_key((a_name, b_name)))] = score
            self.plotscores['all'].append(score)
            if reco_info is not None:
                if from_same_event:
                    self.plotscores['same'].append(score)
                else:
                    self.plotscores['diff'].append(score)
            if debug:
                outstr = ''.join(dbg_str_list)
                if outfile == None:
                    print outstr
                else:
                    outfile.write(outstr + '\n')

        for query, cluster_id in self.query_clusters.iteritems():
            if cluster_id not in self.id_clusters:
                self.id_clusters[cluster_id] = []
            self.id_clusters[cluster_id].append(query)
        for cluster_id, queries in self.id_clusters.items():
            if len(queries) == 1:
                self.singletons.append(queries[0])

        # print 'nearest',self.nearest_true_mate
        out_str_list = ['  %d clusters:\n'%len(self.id_clusters), ]
        for cluster_id in self.id_clusters:
            out_str_list.append('   ' + ' '.join([str(x) for x in self.id_clusters[cluster_id]]) + '\n')
        if outfile == None:
            print ''.join(out_str_list)
        else:
            outfile.write(''.join(out_str_list))
Ejemplo n.º 20
0
def solve(problem):
    start_node = Node(problem.start_state)
    start_node_value = start_node.path_cost + problem.get_heuristic(
        start_node.state)

    open_list = OpenList()
    open_list.add(start_node, start_node_value)

    closed_list = set()

    while open_list:
        current_node = open_list.remove()

        if problem.is_goal(current_node.state):
            return get_solution(current_node)

        current_node_key = get_key(current_node.state)
        closed_list.add(current_node_key)

        for child_node in get_children_nodes(problem, current_node):
            child_node_key = get_key(child_node.state)

            if child_node_key not in closed_list and child_node not in open_list:
                child_node_value = child_node.path_cost + problem.get_heuristic(
                    child_node.state)
                open_list.add(child_node, child_node_value)
            elif child_node in open_list:
                stored_child_node = open_list[child_node]

                child_node_value = child_node.path_cost + problem.get_heuristic(
                    child_node.state)
                stored_child_node_value = stored_child_node.path_cost + problem.get_heuristic(
                    stored_child_node.state)

                if child_node_value < stored_child_node_value:
                    del open_list[stored_child_node]
                    open_list.add(child_node, child_node_value)

    return None
Ejemplo n.º 21
0
def linear_conflict(state, final_state):
    ret = 0
    size = len(state)
    keys = list(state.keys())
    for index, key_j in enumerate(keys):
        if index + 1 < size:
            key_k = keys[index + 1]
            if state[key_j] != 0 and state[key_k] != 0 and key_k[1] == key_j[1]:
                goal_key_j = get_key(final_state, state[key_j])
                goal_key_k = get_key(final_state, state[key_k])
                if key_j[1] == goal_key_j[1] and key_k[1] == goal_key_k[
                        1] and state[key_j] > state[key_k]:
                    ret += 1
            key_k = (key_k[1], key_k[0])
            key_j = (key_j[1], key_j[0])
            if state[key_j] != 0 and state[key_k] != 0 and key_k[0] == key_j[0]:
                goal_key_j = get_key(final_state, state[key_j])
                goal_key_k = get_key(final_state, state[key_k])
                if key_j[0] == goal_key_j[0] and key_k[0] == goal_key_k[
                        0] and state[key_j] > state[key_k]:
                    ret += 1
    return manhattan(state, final_state) + 2 * ret
Ejemplo n.º 22
0
def get_num_coll_loc():
    """
    Function to get number of collisions in specified location (can be specified off/on/cross street)
    :return:
    """
    result = 0
    if num_coll_loc[0] == utils.OFF_STREET:
        result = r.get(
            utils.get_key(utils.LOCATION, city, utils.OFF_STREET,
                          num_coll_loc[1]))
    elif num_coll_loc[0] == utils.ON_STREET:
        result = r.get(
            utils.get_key(utils.LOCATION, city, utils.ON_STREET,
                          num_coll_loc[1]))
    elif num_coll_loc[0] == utils.CROSS_STREET:
        result = r.get(
            utils.get_key(utils.LOCATION, city, utils.CROSS_STREET,
                          num_coll_loc[1]))
    else:
        raise ValueError('Invalid type of location: ', num_coll_loc[0])

    return result
Ejemplo n.º 23
0
 def main(self):
     # get the api key
     self.api_key = get_key()
     # initialize the data file
     self.init_data_file()
     # initialize the addresses
     self.init_addresses()
     # initialize the bloom filter
     self.init_bf()
     # open the connection and start recieving data
     asyncio.get_event_loop().run_until_complete(self.listen())
     # keep listening
     asyncio.get_event_loop().run_forever()
     logger.info('main end')
Ejemplo n.º 24
0
def update(word: str, lives: int, guessed: List[str]) -> int:
    paint(guessed)
    key = get_key()
    if key.isalpha():
        positions = find_all_elements(word, key)
        if not positions:
            lives = lost_life(lives)
        else:
            for i in positions:
                guessed[i] = key

    clear_screen()

    return lives
Ejemplo n.º 25
0
Archivo: main.py Proyecto: rettier/c
def get():
    key = get_key()
    result = process_custom_command(key)

    if result is False:
        if storage_backend.has_key(key):
            result = storage_backend.get(key)
        else:
            result = empty_gzip

    if isinstance(result, str):
        result = gzip.compress(result.encode("utf-8"))

    return Response(result, content_type="application/octet-stream")
Ejemplo n.º 26
0
def delete_value(value):
    key = get_key()
    if key in db.keys():
        stats = db[key]
        if value in stats.keys():
            if stats[value] != 0:
                stats[value] -= 1
                db[key] = stats
            return 'Deleted'
        else:
            return 'This parameter is clear'

    else:
        return 'You haven\'t loses today'
Ejemplo n.º 27
0
def _generate_leveldb(file_path, image_paths, targets, width, height):
    """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
    print "\t\tGenerating LevelDB file at %s..." % file_path
    shutil.rmtree(file_path, ignore_errors=True)
    db = plyvel.DB(file_path, create_if_missing=True)
    wb = db.write_batch()
    commit_every = 10000
    start_time = int(round(time.time() * 1000))
    for idx in range(len(image_paths)):
        # Each image is a top level key with a keyname like 00000000011, in increasing
        # order starting from 00000000000.
        key = utils.get_key(idx)

        # Do common normalization that might happen across both testing and validation.
        try:
            image = _preprocess_data(
                _load_numpy_image(image_paths[idx], width, height))
        except:
            print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[
                idx]
            continue

        # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
        datum = Datum()
        datum.channels = 3  # RGB
        datum.height = height
        datum.width = width
        datum.data = image.tostring()
        datum.label = targets[idx]
        value = datum.SerializeToString()
        wb.put(key, value)

        if (idx + 1) % commit_every == 0:
            wb.write()
            del wb
            wb = db.write_batch()
            end_time = int(round(time.time() * 1000))
            total_time = end_time - start_time
            print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % (
                key, total_time)
            start_time = int(round(time.time() * 1000))

    end_time = int(round(time.time() * 1000))
    total_time = end_time - start_time
    print "\t\t\tWriting final batch, time for batch: %d ms" % total_time
    wb.write()
    db.close()
Ejemplo n.º 28
0
def callback_inline(call):
    table_name = '{0}_{1.id}'.format(month, call.from_user)
    try:
        if call.message:
            if call.data == 'count':
                bot.send_message(
                    call.message.chat.id,
                    'Всего: ' + str(count(table_name)) + ' движений')
            elif call.data == 'all':
                bot.send_message(call.message.chat.id, get_all(table_name))

            elif call.data in TRANSLATE.values():
                val = get_key(TRANSLATE, call.data)
                table = '{0}_{1}_{2.id}'.format(val, date.strftime('%Y'),
                                                call.from_user)
                try:
                    bot.send_message(call.message.chat.id, get_all(table))
                    bot.send_message(call.message.chat.id,
                                     'Всего: ' + str(i_got(table)))
                except Exception as e:
                    logger.error('Ошибка запроса. Несуществующий месяц ' +
                                 repr(e))
                    bot.send_message(call.message.chat.id,
                                     'В том месяце голяк.. Выбери другой!')

            elif call.data == '500':
                add_summ(500, table_name)
                bot.send_message(call.message.chat.id,
                                 'Пятиха... Ну не так уж и плохо :)')
            elif call.data == '1000':
                add_summ(1000, table_name)
                bot.send_message(call.message.chat.id, 'Касарик прилетел!')

            else:
                bot.send_message(call.message.chat.id,
                                 'Миша, давай нпоновой! :( ')

            # remove inline buttons
            bot.edit_message_text(chat_id=call.message.chat.id,
                                  message_id=call.message.message_id,
                                  text='Найс!',
                                  reply_markup=None)
            # show alert
            bot.answer_callback_query(callback_query_id=call.id,
                                      show_alert=False,
                                      text="Я ЗАПОМНИЛ ;) ")
    except Exception as e:
        # print('Ошибка callback_query: ' + repr(e))
        logger.error('Ошибка callback_query: ' + repr(e))
Ejemplo n.º 29
0
def menu(menu_text: str, options: List[MenuOption[MenuReturn]]) -> MenuReturn:
    options_list: List[str] = []
    options_dict: Dict[str, MenuFunction[MenuReturn]] = {}
    for i, option in enumerate(options, start=1):
        options_list.append(f"{i}) {option[0]}")
        options_dict[str(i)] = option[1]
    options_text = '\n'.join(options_list)
    print(f"{clean_lines(menu_text)}\n", "\n"
          "Options available:\n"
          "\n"
          f"{clean_lines(options_text)}"
          "\n")
    key: str = get_key("Please choose an option: ",
                       lambda k: k in options_dict and k or None)
    return options_dict[key]()  # call the chosen option
Ejemplo n.º 30
0
def get_num_coll_time():
    """
    Function to get number of collisions in specified duration (can be specified hour/day/month/year)
    :return:
    """
    result = 0
    if num_coll_time[0] == utils.HOUR:
        result = r.get(
            utils.get_key(utils.TIME, city, utils.HOUR, int(num_coll_time[1])))
    elif num_coll_time[0] == utils.DAY:
        result = r.get(
            utils.get_key(utils.TIME, city, utils.DAY,
                          utils.get_day(int(num_coll_time[1]))))
    elif num_coll_time[0] == utils.MONTH:
        result = r.get(
            utils.get_key(utils.TIME, city, utils.MONTH,
                          utils.get_month(int(num_coll_time[1]))))
    elif num_coll_time[0] == utils.YEAR:
        result = r.get(
            utils.get_key(utils.TIME, city, utils.HOUR, int(num_coll_time[1])))
    else:
        raise ValueError('Invalid type of duration: ', num_coll_time[0])

    return result
 def post(self):
     args = self.reqparse.parse_args()
     token = args['token']
     move = args['move']
     register = game['register']
     status = game['status']
     player = utils.get_key(register, token)
     if player != -1:
         game['status'][player] = move
         if status['player1'] != None and status['player2'] != None:
             previousGame = utils.play(status['player1'], status['player2'])
             status['previousGame'] = previousGame
         print(game)
         return 1
     else:
         return -1
Ejemplo n.º 32
0
def _generate_leveldb(file_path, image_paths, targets, width, height):
    """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
    print "\t\tGenerating LevelDB file at %s..." % file_path
    shutil.rmtree(file_path, ignore_errors=True)
    db = plyvel.DB(file_path, create_if_missing=True)
    wb = db.write_batch()
    commit_every = 10000
    start_time = int(round(time.time() * 1000))
    for idx in range(len(image_paths)):
      # Each image is a top level key with a keyname like 00000000011, in increasing
      # order starting from 00000000000.
      key = utils.get_key(idx)

      # Do common normalization that might happen across both testing and validation.
      try:
        image = _preprocess_data(_load_numpy_image(image_paths[idx], width, height))
      except:
        print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[idx]
        continue

      # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
      datum = Datum()
      datum.channels = 3 # RGB
      datum.height = height
      datum.width = width
      datum.data = image.tostring()
      datum.label = targets[idx]
      value = datum.SerializeToString()
      wb.put(key, value)

      if (idx + 1) % commit_every == 0:
        wb.write()
        del wb
        wb = db.write_batch()
        end_time = int(round(time.time() * 1000))
        total_time = end_time - start_time
        print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % (key, total_time)
        start_time = int(round(time.time() * 1000))

    end_time = int(round(time.time() * 1000))
    total_time = end_time - start_time
    print "\t\t\tWriting final batch, time for batch: %d ms" % total_time
    wb.write()
    db.close()
Ejemplo n.º 33
0
def ask(screen, question):
    """ask(screen, question) -> answer"""
    from utils import get_key

    pygame.font.init()
    current_string = ''
    display_box(screen, question + ": " + current_string)
    while 1:
        inkey = get_key()
        if inkey == K_BACKSPACE:
            current_string = current_string[0:-1]
        elif inkey == K_RETURN:
            break
        elif inkey == K_ESCAPE:
            return False
        elif inkey <= 127:
            current_string += chr(inkey)
        display_box(screen, question + ": " + current_string)
    return current_string
Ejemplo n.º 34
0
def _generate_leveldb(self, file_path, image, target, single_data):
    """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
    print "\tGenerating LevelDB file at %s..." % file_path
    shutil.rmtree(file_path, ignore_errors=True)
    db = plyvel.DB(file_path, create_if_missing=True)
    wb = db.write_batch()
    commit_every = 250000
    start_time = int(round(time.time() * 1000))
    for idx in range(len(pairs)):
      # Each image is a top level key with a keyname like 00000000011, in increasing
      # order starting from 00000000000.
      key = utils.get_key(idx)

      # Do things like mean normalize, etc. that happen across both testing and validation.
      paired_image = self._preprocess_data(paired_image)

      # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
      datum = Datum()
      # TODO(neuberg): Confirm that this is the correct way to setup RGB images for
      # Caffe for our dataset.
      datum.channels = 3
      datum.height = constants.HEIGHT
      datum.width = constants.WIDTH
      datum.data = image.tostring()
      datum.label = target[idx]
      value = datum.SerializeToString()
      wb.put(key, value)

      if (idx + 1) % commit_every == 0:
        wb.write()
        del wb
        wb = db.write_batch()
        end_time = int(round(time.time() * 1000))
        total_time = end_time - start_time
        print "Wrote batch, key: %s, time for batch: %d ms" % (key, total_time)
        start_time = int(round(time.time() * 1000))

    wb.write()
    db.close()
Ejemplo n.º 35
0
    def _generate_leveldb(self, file_path, pairs, target):
        """
        Caffe uses the LevelDB format to efficiently load its training and validation data; this method
        writes paired out faces in an efficient way into this format.
        """

        print("Generating LevelDB file at %s ..." % file_path)
        shutil.rmtree(file_path, ignore_errors=True)
        db = plyvel.DB(file_path, create_if_missing=True)
        wb = db.write_batch()
        commit_every = 500
        start_time = int(round(time.time() * 1000))

        for i in range(len(pairs)):
            key = utils.get_key(i)

            image_1, image_2 = pairs[i]
            paired_image = np.concatenate([image_1, image_2])

            paired_image = self._preprocess_data(paired_image)

            datum = caffe.io.caffe_pb2.Datum()
            datum.channels = 2
            datum.height = constants.HEIGHT
            datum.width = constants.WIDTH
            datum.data = paired_image.tostring()
            datum.label = target[i]

            value = datum.SerializeToString()
            wb.put(key, value)

            if (i + 1) % commit_every == 0:
                wb.write()
                del wb
                wb = db.write_batch()
                end_time = int(round(time.time() * 1000))
                total_time = end_time - start_time
                print("Wrote batch, key: %s, time for batch: %d ms" % (key, total_time))
                start_time = int(round(time.time() * 1000))

        wb.write()
        db.close()
def kowalski_analyze(
        browser_driver,
        item_name,
        item_info,
        buy_orders,
        sell_orders,
        classified_url,
        current_key_price,
        particle_effect):
    item_info[1] = utils.convert_currency(item_info[1], current_key_price)

    quickbuy_coefficient = -1
    so_ratio = -1
    traders_coefficient = -1
    lowest_so_to_bp_price = 1000

    if buy_orders[0] is not None and item_info[1] is not None:
        traders_coefficient = buy_orders[0][0] / float(item_info[1])
    if not sell_orders[0]:
        sell_orders[0] == 0
    if sell_orders[0] and buy_orders[0] and sell_orders[0] is not None and len(sell_orders[0]) != 0:
        if sell_orders[0][0] is not None and sell_orders[0][0]:
            quickbuy_coefficient = buy_orders[0][0] / sell_orders[0][0]
    if sell_orders[0] and sell_orders[0] is not None and len(sell_orders[0]) != 0:
        if len(sell_orders[0]) >= 2:
            if sell_orders[0][0] is not None and sell_orders[0][1] is not None:
                so_ratio = sell_orders[0][0] / sell_orders[0][1]
                if item_info[1] is not None:
                    lowest_so_to_bp_price = sell_orders[0][0] / item_info[1]

    if traders_coefficient >= 0.75 >= so_ratio and lowest_so_to_bp_price < 1: #  quickbuy_coefficient >= 0.75 and
        effect_name = utils.get_key(config.particles_dict, int(particle_effect))
        print(f'{effect_name} {item_name}')
        print(f'url: {classified_url}')
        print(f'bo_count: {buy_orders[1]} | bo_prices: {buy_orders[0]}')
        print(f'so_count: {sell_orders[1]} | so_count: {sell_orders[0]}')
        print(f'bp price: {item_info[1]}')
        print(f'quickbuy_coefficient: {quickbuy_coefficient}')
        print(f'traders_coefficient: {traders_coefficient}')
        print(f'lowest_so_to_bp_price: {lowest_so_to_bp_price}')
    else:
        print(datetime.datetime.now())
Ejemplo n.º 37
0
def evaluate(encoder, decoder, sentence, source_lang, target_lang):
    out_seq = []
    with torch.no_grad():
        in_tensor = sentence_to_tensor(sentence, source_lang, device)

        ctx_vec, outputs = encode_seq(encoder, in_tensor, device=device)

        input = torch.tensor([[Tokens.SOS.value]]).to(device=device)
        hidden = ctx_vec

        for idx in range(args.max_words):
            output, hidden = decoder(input, hidden, outputs)
            topv, topi = output.topk(1)
            if topi.item() == Tokens.EOS.value:
                break
            else:
                out_seq.append(get_key(target_lang.dict, topi.item()))
            input = topi.squeeze().detach()

    return out_seq
    def enter_drive_mode(self):
        if self.connected_to_droid:
            print('\nPreparing for drive mode...\n')
            self.set_stance(1, _print=False)

            self.drive_mode = True
            print('\nControls:\n%s\n' % utils.get_drive_mode_controls_text())
            print('Ready for keyboard input...\n')

            speed, angle = 0, self.angle
            while True:
                key = utils.get_key()
                break_, speed, angle = self.process_key(key, speed, angle)
                if break_:
                    break

            print('Exiting drive move...\n')
            self.drive_mode = False
            self.set_stance(2, _print=False)
        else:
            print('You must connect to a droid before you can enter drive mode')
Ejemplo n.º 39
0
def get_sbg_keys():
    return get_key('sbgkey')
Ejemplo n.º 40
0
    def cluster(self, input_scores=None, infname=None, debug=False, reco_info=None, outfile=None, plotdir=''):
        if infname is None:
            assert input_scores is not None
        else:
            assert input_scores is None  # should only specify <input_scores> *or* <infname>
            input_scores = []
            with opener('r')(infname) as infile:
                reader = csv.DictReader(infile)
                for line in reader:
                    input_scores.append(line)
        sorted_lines = sorted(input_scores, key=lambda k: float(k['score']))
        for line in sorted_lines:
            a_name = line['id_a']
            b_name = line['id_b']
            score = float(line['score'])
            from_same_event = -1 if (reco_info == None or a_name not in reco_info or b_name not in reco_info) else reco_info[a_name]['reco_id'] == reco_info[b_name]['reco_id']
            dbg_str_list = ['%22s %22s   %8.3f   %d' % (a_name, b_name, score, from_same_event), ]
            self.incorporate_into_clusters(a_name, b_name, score, dbg_str_list)
            self.pairscores[(utils.get_key((a_name, b_name)))] = score
            self.plotscores['all'].append(score)
            if reco_info != None:
                if from_same_event:
                    self.plotscores['same'].append(score)
                else:
                    self.plotscores['diff'].append(score)
            # if reco_info != None and reco_info[a_name]['reco_id'] == reco_info[b_name]['reco_id']:
            #     for query,score in {a_name:score, b_name:score}.iteritems():
            #         if query not in self.nearest_true_mate:
            #             self.nearest_true_mate[query] = score
            #         elif self.greater_than and score > self.nearest_true_mate[query]:
            #             self.nearest_true_mate[query] = score
            #         elif not self.greater_than and score < self.nearest_true_mate[query]:
            #             self.nearest_true_mate[query] = score
            if debug:
                outstr = ''.join(dbg_str_list)
                if outfile == None:
                    print outstr
                else:
                    outfile.write(outstr + '\n')

        if plotdir != '':
            utils.prep_dir(plotdir + '/plots', '*.svg')
            hists = {}
            for htype in ['all', 'same', 'diff']:
                hists[htype] = plotting.make_hist_from_list(self.plotscores[htype], htype + '_pairscores')
                hists[htype].SetTitle(htype)
            plotting.draw(hists['all'], 'float', plotdir=plotdir, plotname='pairscores', more_hists=[hists['same'], hists['diff']])
            check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg'])
            check_call(['./bin/permissify-www', plotdir])

        for query, cluster_id in self.query_clusters.iteritems():
            if cluster_id not in self.id_clusters:
                self.id_clusters[cluster_id] = []
            self.id_clusters[cluster_id].append(query)
        for cluster_id, queries in self.id_clusters.items():
            if len(queries) == 1:
                self.singletons.append(queries[0])

        # print 'nearest',self.nearest_true_mate
        out_str_list = ['  %d clusters:\n'%len(self.id_clusters), ]
        for cluster_id in self.id_clusters:
            out_str_list.append('   ' + ' '.join([str(x) for x in self.id_clusters[cluster_id]]) + '\n')
        if outfile == None:
            print ''.join(out_str_list)
        else:
            outfile.write(''.join(out_str_list))
Ejemplo n.º 41
0
def get_s3_keys():
    return get_key('sbgs3key')
Ejemplo n.º 42
0
def get_access_keys():
    keys = get_key()
    # TODO: store with appropriate server, for now default to testportal
    keys['default']['server'] = 'https://testportal.4dnucleome.org'
    return keys
Ejemplo n.º 43
0
    def read_hmm_output(self, algorithm, hmm_csv_outfname, make_clusters=True, count_parameters=False, parameter_out_dir=None, plotdir=None):
        print '    read output'
        if count_parameters:
            assert parameter_out_dir is not None
            assert plotdir is not None
        pcounter = ParameterCounter(self.germline_seqs) if count_parameters else None
        true_pcounter = ParameterCounter(self.germline_seqs) if (count_parameters and not self.args.is_data) else None
        perfplotter = PerformancePlotter(self.germline_seqs, plotdir + '/hmm/performance', 'hmm') if self.args.plot_performance else None

        n_processed = 0
        hmminfo = []
        with opener('r')(hmm_csv_outfname) as hmm_csv_outfile:
            reader = csv.DictReader(hmm_csv_outfile)
            last_key = None
            boundary_error_queries = []
            for line in reader:
                utils.intify(line, splitargs=('unique_ids', 'seqs'))
                ids = line['unique_ids']
                this_key = utils.get_key(ids)
                same_event = from_same_event(self.args.is_data, True, self.reco_info, ids)
                id_str = ''.join(['%20s ' % i for i in ids])

                # check for errors
                if last_key != this_key:  # if this is the first line for this set of ids (i.e. the best viterbi path or only forward score)
                    if line['errors'] != None and 'boundary' in line['errors'].split(':'):
                        boundary_error_queries.append(':'.join([str(uid) for uid in ids]))
                    else:
                        assert len(line['errors']) == 0

                if algorithm == 'viterbi':
                    line['seq'] = line['seqs'][0]  # add info for the best match as 'seq'
                    line['unique_id'] = ids[0]
                    utils.add_match_info(self.germline_seqs, line, self.cyst_positions, self.tryp_positions, debug=(self.args.debug > 0))

                    if last_key != this_key or self.args.plot_all_best_events:  # if this is the first line (i.e. the best viterbi path) for this query (or query pair), print the true event
                        n_processed += 1
                        if self.args.debug:
                            print '%s   %d' % (id_str, same_event)
                        if line['cdr3_length'] != -1 or not self.args.skip_unproductive:  # if it's productive, or if we're not skipping unproductive rearrangements
                            hmminfo.append(dict([('unique_id', line['unique_ids'][0]), ] + line.items()))
                            if pcounter is not None:  # increment counters (but only for the best [first] match)
                                pcounter.increment(line)
                            if true_pcounter is not None:  # increment true counters
                                true_pcounter.increment(self.reco_info[ids[0]])
                            if perfplotter is not None:
                                perfplotter.evaluate(self.reco_info[ids[0]], line)

                    if self.args.debug:
                        self.print_hmm_output(line, print_true=(last_key != this_key), perfplotter=perfplotter)
                    line['seq'] = None
                    line['unique_id'] = None

                else:  # for forward, write the pair scores to file to be read by the clusterer
                    if not make_clusters:  # self.args.debug or 
                        print '%3d %10.3f    %s' % (same_event, float(line['score']), id_str)
                    if line['score'] == '-nan':
                        print '    WARNING encountered -nan, setting to -999999.0'
                        score = -999999.0
                    else:
                        score = float(line['score'])
                    if len(ids) == 2:
                        hmminfo.append({'id_a':line['unique_ids'][0], 'id_b':line['unique_ids'][1], 'score':score})
                    n_processed += 1

                last_key = utils.get_key(ids)

        if pcounter is not None:
            pcounter.write(parameter_out_dir)
            if not self.args.no_plot:
                pcounter.plot(plotdir, subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions)
        if true_pcounter is not None:
            true_pcounter.write(parameter_out_dir + '/true')
            if not self.args.no_plot:
                true_pcounter.plot(plotdir + '/true', subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions)
        if perfplotter is not None:
            perfplotter.plot()

        print '  processed %d queries' % n_processed
        if len(boundary_error_queries) > 0:
            print '    %d boundary errors (%s)' % (len(boundary_error_queries), ', '.join(boundary_error_queries))

        return hmminfo
Ejemplo n.º 44
0
    # Level 0 classifiers
    clfs = [
        ExtraTreesClassifier(**utils.read_estimator_params(s, "et")),
        LogisticRegression(**utils.read_estimator_params(s, "lr")),
        RandomForestClassifier(**utils.read_estimator_params(s, "rf"))
    ]

    # First, run grid search (if enabled) to find the best estimator
    results_1 = []
    for clf in clfs:
        ts = time.time()
        clf_name = type(clf).__name__
        model = utils.find_best_estimator(clf, X_train, y_train, section=s)
        preds = model.predict_proba(X_valid)
        log_loss = metrics.log_loss(y_valid, preds)
        results_1.append((utils.get_key(clf_name), model, log_loss))
        logger.info("Trained {} in {:.2f} seconds, Log loss : {:.6f}"
            .format(clf_name, (time.time() - ts), log_loss))
    # Sort by log_loss
    results_1.sort(key=lambda tup: tup[2])
    logger.info(tabulate(zip([r[0] for r in results_1],
                             [r[2] for r in results_1]),
                         floatfmt=".4f", headers=("model", "log_loss")))
    clfs = [clf[1] for clf in results_1] # required for blending stage

    # Next, run stacked generalization (blending)
    logger.info("Start blending")
    results_2 = []
    for i in xrange(cfg[s]["n_blends"]):
        print("Iteration {}".format(i))
        bclf, b_t, log_loss = run_stacked_generalization(clfs, train, target)