Ejemplo n.º 1
0
    def predict(self, st=0, en=None, indices=None, scaler_key: str = '5', pref: str = 'test',
                use_datetime_index=True, **plot_args):
        """
        scaler_key: if None, the data will not be indexed along date_time index.

        """

        if indices is not None:
            setattr(self, 'predict_indices', indices)

        inputs, true_outputs = self.run_paras(st=st, en=en, indices=indices, scaler_key=scaler_key,
                                              return_dt_index=use_datetime_index)

        first_input = inputs[0]
        dt_index = np.arange(len(first_input))  # default case when datetime_index is not present in input data
        if use_datetime_index:
            # remove the first of first inputs which is datetime index
            if np.ndim(first_input) == 2:
                dt_index = get_index(np.array(first_input[:, 0], dtype=np.int64))
                first_input = first_input[:, 1:]
            else:
                dt_index = get_index(np.array(first_input[:, -1, 0], dtype=np.int64))
                first_input = first_input[:, :, 1:].astype(np.float32)
            inputs[0] = first_input

        predicted = self.k_model.predict(x=inputs,
                                         batch_size=self.data_config['batch_size'],
                                         verbose=1)

        predicted, true_outputs = self.denormalize_data(first_input, predicted, true_outputs, scaler_key)

        if self.quantiles is None:
            if not isinstance(true_outputs, list):
                true_outputs = [true_outputs]
            if not isinstance(predicted, list):
                predicted = [predicted]

            # convert each output in ture_outputs and predicted lists as pd.Series with datetime indices sorted
            true_outputs = [pd.Series(t_out.reshape(-1,), index=dt_index).sort_index() for t_out in true_outputs]
            predicted = [pd.Series(p_out.reshape(-1,), index=dt_index).sort_index() for p_out in predicted]

            # save the results
            for idx, out in enumerate(self.out_cols):
                p = predicted[idx]
                t = true_outputs[idx]
                df = pd.concat([t, p], axis=1)
                df.columns = ['true_' + str(out), 'pred_' + str(out)]
                df.to_csv(os.path.join(self.path, pref + '_' + str(out) + ".csv"), index_label='time')

            self.process_results(true_outputs, predicted, pref+'_', **plot_args)

            return true_outputs, predicted

        else:
            assert self.outs == 1
            self.plot_quantiles1(true_outputs, predicted)
            self.plot_quantiles2(true_outputs, predicted)
            self.plot_all_qs(true_outputs, predicted)

            return true_outputs, predicted
Ejemplo n.º 2
0
def preform_regression(test_num, trials):
    trial = 0
    accuracy = 0
    # Load csv into variable
    data = u.read_csv("final_training_dataset.csv")
    # Get required information and get predictions with regression
    watch_price = u.get_values(data[1:], u.get_index(data, 'price'))
    watch_deal = u.normalize_deal(
        u.get_values(data[1:], u.get_index(data, 'deal_type')))
    # Run Trials
    while trial <= trials:
        random_instances = get_random_instances(
            test_num, data[1:])  # Don't include header
        guessed_deals = least_squares_regression(
            watch_price, watch_deal,
            u.get_values(random_instances, u.get_index(data, 'price')), False)
        guessed_deals = u.classify_deal(guessed_deals)
        # Get actual mpg values of random instances
        actual_deals = u.get_values(random_instances,
                                    u.get_index(data, 'deal_type'))

        for i in range(len(random_instances)):
            if guessed_deals[i] == actual_deals[i]:
                accuracy += 1
        trial += 1
    return accuracy
Ejemplo n.º 3
0
        def _get_one_triplet(input_data, input_labels):
            input_labels = np.array(input_labels)
            index = np.random.choice(n_labels, 2, replace=False)
            label_positive = index[0]
            label_negative = index[1]

            indexes = utils.get_index(input_labels, index[0])
            np.random.shuffle(indexes)
            # print(indexes[0])
            data_anchor = input_data[indexes[0], :, :, :]
            data_anchor = utils.prewhiten(data_anchor)
            data_anchor = utils.flip(data_anchor, random_flip=True)
            data_anchor = utils.random_crop(data_anchor, image_size=299)
            data_anchor = utils.random_rotate_image(data_anchor)

            data_positive = input_data[indexes[1], :, :, :]
            data_positive = utils.prewhiten(data_positive)
            data_positive = utils.flip(data_positive, random_flip=True)
            data_positive = utils.random_crop(data_positive, image_size=299)
            data_positive = utils.random_rotate_image(data_positive)

            indexes = utils.get_index(input_labels, index[1])
            # print(indexes)
            np.random.shuffle(indexes)
            data_negative = input_data[indexes[0], :, :, :]
            data_negative = utils.prewhiten(data_negative)
            data_negative = utils.flip(data_negative, random_flip=True)
            data_negative = utils.random_crop(data_negative, image_size=299)
            data_negative = utils.random_rotate_image(data_negative)
            # print(np.shape(data_negative))


            return data_anchor, data_positive, data_negative, \
                   label_positive, label_positive, label_negative
Ejemplo n.º 4
0
    def __getitem__(self, index):
        # TODO: Properly implement band dimension
        if len(index) == 2:

            corr_lat_start = filter_coord(index[0].start, self.y_dim)
            corr_lat_end = filter_coord(index[0].stop, self.y_dim)
            corr_lon_start = filter_coord(index[1].start, self.x_dim)
            corr_lon_end = filter_coord(index[1].stop, self.x_dim)

            lat1 = get_index(corr_lat_start, self.y_dim)
            lat2 = get_index(corr_lat_end, self.y_dim) + 1
            lon1 = get_index(corr_lon_start, self.x_dim)
            lon2 = get_index(corr_lon_end, self.x_dim) + 1

            if self.array is None:
                return Tile3(origin_id=self.origin_id, bands=6, lat_start=corr_lat_start, lat_end=corr_lat_end,
                             lon_start=corr_lon_start, lon_end=corr_lon_end, array=None, lazy=True)

            else:
                return Tile3(origin_id=self.origin_id, bands=6, lat_start=corr_lat_start, lat_end=corr_lat_end,
                             lon_start=corr_lon_start, lon_end=corr_lon_end, array=self.array[lon1:lon2, lat1:lat2], lazy=False)

        else:
            # TODO: Properly manage index exceptions
            raise Exception
Ejemplo n.º 5
0
 def __init__(self):
     index_entpath = "indexes/entity_2M.pkl"
     index_reachpath = "indexes/reachability_2M.pkl"
     index_namespath = "indexes/names_2M.pkl"
     # FIXME: store the Freebase graph with the name field
     fb_path = "indexes/fb_graph.pkl"
     self.index_ent = get_index(index_entpath)
     self.index_names = get_index(index_namespath)
     self.index_reach = get_index(index_reachpath)
     self.fb_graph = get_index(fb_path)
Ejemplo n.º 6
0
def scatter_plot(myList, attributeX, attributeY, filename, title, xLabel,
                 yLabel):
    plt.figure(figsize=(20, 7))
    data = myList[1:]
    x = u.get_values(data, u.get_index(myList, attributeX))
    y = u.get_values(data, u.get_index(myList, attributeY))
    plt.scatter(x, y, marker='.')
    label_plt(title, xLabel, yLabel)

    plt.savefig(filename)
    plt.close()
Ejemplo n.º 7
0
    def __init__(self, origin_id=None, bands=None, lat_start=None, lat_end=None,
                 lon_start=None, lon_end=None, array =None, lazy=True, f_pointer=None):

        self.origin_id = origin_id
        # TODO Hardcoded satellite (Should come with origin_id)
        self.origin_id["satellite"] = "LS5_TM"
        
        orig_y_dim = get_geo_dim(origin_id["lat_start"], origin_id["lat_extent"], origin_id["pixel_size"])
        orig_x_dim = get_geo_dim(origin_id["lon_start"], origin_id["lon_extent"], origin_id["pixel_size"])

        corr_lat_start = filter_coord(lat_start, orig_y_dim)
        corr_lon_start = filter_coord(lon_start, orig_x_dim)

        corr_lat_end = filter_coord(lat_end, orig_y_dim)
        corr_lon_end = filter_coord(lon_end, orig_x_dim)

        lat1 = get_index(corr_lat_start, orig_y_dim)
        lat2 = get_index(corr_lat_end, orig_y_dim) + 1
        lon1 = get_index(corr_lon_start, orig_x_dim) 
        lon2 = get_index(corr_lon_end, orig_x_dim) + 1

        self.y_dim = get_geo_dim(corr_lat_start, corr_lat_end-corr_lat_start+self.origin_id["pixel_size"],
                                 self.origin_id["pixel_size"])
        self.x_dim = get_geo_dim(corr_lon_start, corr_lon_end-corr_lon_start+self.origin_id["pixel_size"],
                                 self.origin_id["pixel_size"])

        self.bands = bands
        if type(bands) is int: 
            self.band_dim = np.arange(0,1,1)+1
        elif type(bands) is list: 
            self.band_dim = np.arange(0,len(bands),1)+1
        else:
            #TODO throw exception
            pass 

        self.array = None

        if not lazy:
            print (DATA_PATH + "{0}_{1:03d}_{2:04d}_{3}.nc".format(self.origin_id["satellite"],
                                                               int(self.origin_id[u'lon_start']),
                                                               int(self.origin_id[u'lat_start']),
                                                               self.origin_id[u'time'].year))
            with h5py.File(DATA_PATH + "{0}_{1:03d}_{2:04d}_{3}.nc".format(self.origin_id["satellite"],
                                                               int(self.origin_id[u'lon_start']),
                                                               int(self.origin_id[u'lat_start']),
                                                               self.origin_id[u'time'].year), 'r') as dfile:
                
                self.array = dfile[self.origin_id["product"]][self.timestamp][lat1:lat2, lon1:lon2, bands]
                #print lat1,lat2,lon1,lon2,bands
        
        if f_pointer is not None:
                self.array = f_pointer[self.origin_id["product"]][self.timestamp][lat1:lat2, lon1:lon2, bands]
Ejemplo n.º 8
0
def get_threshold(tok, cp_word, date_figures):
    parse = next(parser.parse(tok))  #First, we parse the whole clause

    # And then we search the grammatical context of cp_word
    # This is most of the time a Prepositional Phrase (PP), a Nominal Phrase (NP) or a Quantifier Phrase (NP)

    pp = None
    sub = parse.subtrees()
    for s in sub:
        if (s.label() == "PP" and s.leaves()[0] == cp_word):
            pp = s
    if pp == None:
        pps = get_subtrees(parse, "PP")
        for p in pps:
            if cp_word in p.leaves():
                pp = p
    if pp == None:
        nps = get_subtrees(parse, "NP")
        for n in nps:
            if cp_word in n.leaves():
                pp = n
    if pp == None:
        qps = get_subtrees(parse, "QP")
        for q in qps:
            if cp_word in q.leaves():
                pp = q

    #If a context is found, we look for the first number appearing after cp_word and not being a date
    if pp != None:
        i = get_index(pp.leaves(),
                      cp_word)  #position of the comp word in the context
        fig = get_nodes(pp,
                        "CD")  #list of all numbers appearing in the context
        n = 0
        for f in fig:
            if (n == 0 and get_index(pp.leaves(), f) > i
                    and (f not in date_figures)):
                n = f

        #and if that number exists, we check if an unit multiplier is written just after
        if n != 0:
            k = get_index(tok, n)  #position of the number in the clause
            mult = 1
            try:
                mult = unit_m[tok[k + 1].lower()]
            except:
                pass
            return (float(n) * mult)

    return None
Ejemplo n.º 9
0
 def gen_wordid_list(self, word_map):
     word_ids = []
     word_lengths = []
     for word in self.get_word_list():
         word_ids.append(utils.get_index(word, word_map))
         word_lengths.append(len(word))
     return word_ids, word_lengths
Ejemplo n.º 10
0
    def predict(self, st=0, en=None, indices=None, scaler_key: str = '5', pref: str = 'test',
                use_datetime_index=True, **plot_args):
        out_cols = self.out_cols
        for idx, out in enumerate(out_cols):

            self.out_cols = [self.data_config['outputs'][idx]]  # because fetch_data depends upon self.outs
            inputs, true_outputs = self.test_paras(st=st, en=en, indices=indices, scaler_key=scaler_key,
                                                  return_dt_index=use_datetime_index, data=self.data[idx])
            self.out_cols = self.data_config['outputs']  # setting the actual output columns back to original

            first_input = inputs[0]
            dt_index = np.arange(len(first_input))  # default case when datetime_index is not present in input data
            if use_datetime_index:
                # remove the first of first inputs which is datetime index
                dt_index = get_index(np.array(first_input[:, -1, 0], dtype=np.int64))
                first_input = first_input[:, :, 1:].astype(np.float32)
                inputs[0] = first_input

            predicted = self.k_model.predict(x=inputs,
                                             batch_size=self.data_config['batch_size'],
                                             verbose=1)

            predicted, true_outputs = self.denormalize_data(first_input, predicted, true_outputs, scaler_key)

            true_outputs = pd.Series(true_outputs.reshape(-1,), index=dt_index).sort_index()
            predicted = pd.Series(predicted.reshape(-1, ), index=dt_index).sort_index()

            df = pd.concat([true_outputs, predicted], axis=1)
            df.columns = ['true_' + str(out), 'pred_' + str(out)]
            df.to_csv(os.path.join(self.path, pref + '_' + str(out) + ".csv"), index_label='time')

            self.out_cols = [out]
            self.process_results([true_outputs], [predicted], pref + '_', **plot_args)

        return predicted, true_outputs
Ejemplo n.º 11
0
def serp():
    query = request.args.get('q', '')
    field = request.args.get('field', 'abstract')
    page = request.args.get('page', '1')
    _size = request.args.get('size', '50')
    deps = utils.get_deps(configs)
    page = int(page)
    _size = int(_size)
    print(query)
    print(deps)
    res = search_phrase(query, field=field, deps=deps, 
                        index=utils.get_index(configs), 
                        _from=(page-1)*_size, 
                        _size=_size,
                        subtree=configs['parser']['subtree'])
    records = []
    for record in res['records']:
        record['sentence'] = record['sentence'].replace('<em>', '<span class="serp__match">').replace('</em>','</span>')
        records.append(record)
    no_records = len(records)
    pagination = Pagination(page=page, per_page=_size, total=res['no_hits'],
                                css_framework='bootstrap3')
    return render_template('serp.html', 
                           query=query,
                           records=records,
                           no_records=no_records,
                           no_hits=res['no_hits'],
                           se_time=res['se_time'],
                           nlp_time=res['nlp_time'],
                           no_papers=res['no_papers'],
                           page=page,
                           size=_size,
                           pagination=pagination,
                           settings=configs)
Ejemplo n.º 12
0
Archivo: Api.py Proyecto: keat01/pyLoad
    def __new__(cls, f, *args, **kwargs):
        fc = f.func_code

        try:
            i = get_index(fc.co_names, "user")
        except ValueError: # functions does not uses user, so no need to modify
            return f

        user_context[f.__name__] = True
        new_names = tuple([x for x in fc.co_names if f != "user"])
        new_varnames = tuple([x for x in fc.co_varnames] + ["user"])
        new_code = fc.co_code

        # subtract 1 from higher LOAD_GLOBAL
        for x in range(i + 1, len(fc.co_names)):
            new_code = new_code.replace(chr(opmap['LOAD_GLOBAL']) + chr(x), chr(opmap['LOAD_GLOBAL']) + chr(x - 1))

        # load argument instead of global
        new_code = new_code.replace(chr(opmap['LOAD_GLOBAL']) + chr(i), chr(opmap['LOAD_FAST']) + chr(fc.co_argcount))

        new_fc = code(fc.co_argcount + 1, fc.co_nlocals + 1, fc.co_stacksize, fc.co_flags, new_code, fc.co_consts,
            new_names, new_varnames, fc.co_filename, fc.co_name, fc.co_firstlineno, fc.co_lnotab, fc.co_freevars,
            fc.co_cellvars)

        f.func_code = new_fc

        # None as default argument for user
        if f.func_defaults:
            f.func_defaults = tuple([x for x in f.func_defaults] + [None])
        else:
            f.func_defaults = (None,)

        return f
Ejemplo n.º 13
0
def create_ranking(filename, data, _ids=None, top_k=5, file_type='xlsx'):

    if _ids is None:
        gs = get_all_geneset()
    else:
        gs = _ids

    ind = list(range(0, data.shape[0]))
    gensets = get_geneset(ind)

    rankings = []
    rankings_to_df = []

    for i in gs:
        index = get_index(i)
        point_embeddings = [data[index]]
        pr = point_ranking(point_embeddings, data, ind, gensets, top_k)

        rankings.append(pr)
        for k in pr:
            zone = [i, k[1], k[2]]
            rankings_to_df.append(zone)
        cols = ['Node', 'Similar', 'rank-distance']

    rank_df = pd.DataFrame(rankings_to_df, columns=cols)
    rank_df = rank_df.set_index(['Node', 'Similar'], inplace=False)

    if file_type == 'xlsx':
        writer = pd.ExcelWriter(filename)
        rank_df.to_excel(writer)
        writer.save()
    elif file_type == 'csv':
        rank_df.to_csv(filename, sep='\t')
Ejemplo n.º 14
0
 def gen_char_list(self, char_map):
     sent_char_lists = []
     word_list = self.get_word_list()
     for w in word_list:
         char_list = []
         for c in w:
             char_list.append(utils.get_index(c, char_map))
         sent_char_lists.append(char_list)
     return sent_char_lists
Ejemplo n.º 15
0
 def init_from_xml(self, node):
     self.type = node.tag
     self.attrib = node.attrib
     self.indexs = get_index(node)
     for cnode in node:
         if cnode.tag == "indexlist":
             continue
         subnode = DRSnode()
         subnode.init_from_xml(cnode)
         self.expression.append(subnode)
Ejemplo n.º 16
0
        def _get_one_triplet(input_data, input_labels):
            input_labels = np.array(input_labels)
            index = np.random.choice(n_labels, 2, replace=False)
            label_positive = index[0]
            label_negative = index[1]

            indexes = utils.get_index(input_labels, index[0])
            np.random.shuffle(indexes)
            # print(indexes[0])
            data_anchor = input_data[indexes[0], :, :, :]
            data_positive = input_data[indexes[1], :, :, :]

            indexes = utils.get_index(input_labels, index[1])
            # print(indexes)
            np.random.shuffle(indexes)
            data_negative = input_data[indexes[0], :, :, :]

            return data_anchor, data_positive, data_negative, \
                   label_positive, label_positive, label_negative
Ejemplo n.º 17
0
 def init_from_xml(self, node):
     self.type = node.tag
     self.text = node.text.strip()
     self.attrib = node.attrib
     self.indexs = get_index(node)
     for cnode in node:
         if cnode.tag in ["indexlist", "tokens", "taggedtokens"]:
             continue
         subnode = DRSnode()
         subnode.init_from_xml(cnode)
         self.expression.append(subnode)
Ejemplo n.º 18
0
def histogram(myList, attribute, filename, title, xlabel, ylabel):
    plt.figure(figsize=(20, 7))
    data = myList[1:]
    col_index = u.get_index(myList, attribute)
    x, y = u.get_frequencies(data, col_index)
    plt.bar(x, y)
    plt.title(title.title())
    plt.ylabel(xlabel.title())
    plt.xlabel(ylabel.title())
    # Save graph and close figure
    plt.savefig(filename)
    plt.close()
Ejemplo n.º 19
0
        def read_batch_input(input_data, input_labels):

            input_labels = np.array(input_labels)
            index = np.random.choice(n_labels, 2, replace=False)
            label_positive = index[0]

            indexes = utils.get_index(input_labels, index[0])
            np.random.shuffle(indexes)
            data_anchor = input_data[indexes[0]]
            data_anchor = _read_image(data_anchor)

            return data_anchor, label_positive
Ejemplo n.º 20
0
def get_sector_tickers_data():
    kospi, kosdaq = get_index()
    markets = kospi + kosdaq
    stocks = []

    for idx in markets:
        stocks.append(stock.get_index_portfolio_deposit_file(idx))
        time.sleep(0.5)

    df = pd.DataFrame({'index': markets, 'stocks': stocks})

    make_csv(df, 'sector_tickers_')
Ejemplo n.º 21
0
def get_tesserae(sources=None,
                 products=None,
                 t1=None,
                 t2=None,
                 x1=None,
                 x2=None,
                 y1=None,
                 y2=None,
                 bands=None):

    tesserae = []

    for source in sources:

        index = IndexFactory(source)

        for product in products:

            file_names = index.get_files(product, t1, t2, x1, x2, y1, y2)

            for file_name in file_names:

                if os.path.isfile(file_name):
                    tessera = Tessera(source=source, product=product)

                    with h5py.File(file_name, 'r') as hfile:

                        time_dim = hfile[product].dims[0][0].value
                        x_dim = hfile[product].dims[1][0].value
                        y_dim = hfile[product].dims[2][0].value

                        if len(hfile[product].shape) == 3:
                            band_dim = np.arange(1)
                        elif len(hfile[product].shape) == 4:
                            band_dim = hfile[product].dims[3][0].value

                        t1_i = get_index(time.mktime(t1.timetuple()), time_dim)
                        t2_i = get_index(time.mktime(t2.timetuple()), time_dim)
                        tessera.t_dim = time_dim[t1_i:t2_i]

                        x1_i = get_index(x1, x_dim)
                        x2_i = get_index(x2, x_dim)
                        tessera.x_dim = x_dim[x1_i:x2_i]

                        #TODO File names are top-left -> bottom-left
                        y1_i = y_dim.shape[0] - get_index(y1, y_dim)
                        y2_i = y_dim.shape[0] - get_index(y2, y_dim)
                        #TODO Change order of vars
                        tessera.y_dim = y_dim[y2_i:y1_i]

                        #TODO Select bands from input parameters
                        tessera.b_dim = band_dim

                        #TODO Change order of vars
                        tessera.array = hfile[product][t1_i:t2_i, y2_i:y1_i,
                                                       x1_i:x2_i]

                    tesserae.append(tessera)

    return tesserae
Ejemplo n.º 22
0
def create_ranking_(filename, data, _ids=None, top_k=5):

    dic = {}
    ind = list(range(0, data.shape[0]))
    gensets = get_geneset(ind)
    _ids = get_all_geneset()

    for geneset in _ids:
        index = get_index(geneset)
        point_embeddings = [data[index]]
        pr = point_ranking(point_embeddings, data, ind, gensets, top_k)
        dic[geneset] = [i[1] for i in pr if i[1] != geneset][:4]

    write_file(filename, dic)
Ejemplo n.º 23
0
def _save_durations_csv(input_path, durs_path, data_kind):
    it_multi = pd.MultiIndex.from_product([list(range(1, 134)), ['a', 'b']],
                                          names=['patient', 'trial'])
    df_durs = pd.DataFrame(index=it_multi, columns=['duration_s', 'sfreq'])
    for file in files_builder(data_kind):
        file_path = os.path.join(input_path, file.name)
        index = get_index(file_path)
        trial = get_trial(file_path)
        df_durs.loc[(index, trial),
                    'duration_s'] = get_duration(file_path, file.df)
        df_durs.loc[(index, trial),
                    'sfreq'] = float(get_sampling_frequency(file_path))
    df_durs.to_pickle(durs_path)
    return df_durs
Ejemplo n.º 24
0
        def _get_one_triplet(input_data, input_labels):
            input_labels = np.array(input_labels)
            index = np.random.choice(n_labels, 2, replace=False)
            label_positive = index[0]
            label_negative = index[1]

            indexes = utils.get_index(input_labels, index[0])
            np.random.shuffle(indexes)
            # print(indexes[0])
            data_anchor = input_data[indexes[0]]
            data_anchor = _read_image(data_anchor)

            data_positive = input_data[indexes[1]]
            data_positive = _read_image(data_positive)

            indexes1 = utils.get_index(input_labels, index[1])
            # print(indexes)
            np.random.shuffle(indexes1)
            data_negative = input_data[indexes1[0]]
            data_negative = _read_image(data_negative)
            # print(np.shape(data_negative))

            return data_anchor, data_positive, data_negative, label_positive, label_positive, label_negative
Ejemplo n.º 25
0
    def __getitem__(self, index):
        # TODO: Properly implement band dimension
        if len(index) == 2:

            corr_lat_start = filter_coord(index[0].start, self.y_dim)
            corr_lat_end = filter_coord(index[0].stop, self.y_dim)
            corr_lon_start = filter_coord(index[1].start, self.x_dim)
            corr_lon_end = filter_coord(index[1].stop, self.x_dim)

            lat1 = get_index(corr_lat_start, self.y_dim)
            lat2 = get_index(corr_lat_end, self.y_dim) + 1
            lon1 = get_index(corr_lon_start, self.x_dim)
            lon2 = get_index(corr_lon_end, self.x_dim) + 1

            if self.array is None:
                return Tile3(origin_id=self.origin_id,
                             bands=6,
                             lat_start=corr_lat_start,
                             lat_end=corr_lat_end,
                             lon_start=corr_lon_start,
                             lon_end=corr_lon_end,
                             array=None,
                             lazy=True)

            else:
                return Tile3(origin_id=self.origin_id,
                             bands=6,
                             lat_start=corr_lat_start,
                             lat_end=corr_lat_end,
                             lon_start=corr_lon_start,
                             lon_end=corr_lon_end,
                             array=self.array[lon1:lon2, lat1:lat2],
                             lazy=False)

        else:
            # TODO: Properly manage index exceptions
            raise Exception
Ejemplo n.º 26
0
def get_sector_index_data():
    kospi, kosdaq = get_index()
    
    kospi_df = pd.DataFrame({
            'index': kospi,
            'name': get_name(kospi)
        })

    kosdaq_df = pd.DataFrame({
            'index': kosdaq,
            'name': get_name(kosdaq)
        })

    df = pd.concat([kospi_df, kosdaq_df])
    make_csv(df, 'sector_index_')
Ejemplo n.º 27
0
def elasticsearch():
    is_redirect = request.args.get('redirect', '0')
    print(is_redirect)
    if is_redirect == '1':
        url = request.referrer
        url = url.replace('/search', '/elasticsearch').replace('is_redirect=1', '')
        return redirect(url)

    query = request.args.get('q', '')
    field = request.args.get('field', 'abstract')
    page = request.args.get('page', '1')
    _size = request.args.get('size', '50')
    page = int(page)
    _size = int(_size)
    res = query_database(field, query, index=utils.get_index(configs), _size=_size, _from=(page-1)*_size)
    return jsonify(res)
Ejemplo n.º 28
0
def get_tesserae(sources=None, products=None, t1=None, t2=None, x1=None, x2=None, y1=None, y2=None, bands=None):

    tesserae = []

    for source in sources:

        index = IndexFactory(source)

        for product in products:

            file_names = index.get_files(product, t1, t2, x1, x2, y1, y2)

            for file_name in file_names:

                if os.path.isfile(file_name):
                    tessera = Tessera(source=source, product=product)

                    with h5py.File(file_name, 'r') as hfile:

                        time_dim = hfile[product].dims[0][0].value
                        x_dim = hfile[product].dims[1][0].value
                        y_dim = hfile[product].dims[2][0].value

                        if len(hfile[product].shape) == 3:
                            band_dim = np.arange(1)
                        elif len(hfile[product].shape) == 4:
                            band_dim = hfile[product].dims[3][0].value
                        
                        t1_i = get_index(time.mktime(t1.timetuple()), time_dim)
                        t2_i = get_index(time.mktime(t2.timetuple()), time_dim)
                        tessera.t_dim = time_dim[t1_i:t2_i]

                        x1_i = get_index(x1, x_dim)
                        x2_i = get_index(x2, x_dim)
                        tessera.x_dim = x_dim[x1_i:x2_i]

                        #TODO File names are top-left -> bottom-left
                        y1_i = y_dim.shape[0] - get_index(y1, y_dim)
                        y2_i = y_dim.shape[0] - get_index(y2, y_dim)
                        #TODO Change order of vars
                        tessera.y_dim = y_dim[y2_i:y1_i]

                        #TODO Select bands from input parameters
                        tessera.b_dim = band_dim

                        #TODO Change order of vars
                        tessera.array = hfile[product][t1_i:t2_i, y2_i:y1_i, x1_i:x2_i]

                    tesserae.append(tessera)

    return tesserae
Ejemplo n.º 29
0
 def __init__(self, node):
     self.type = node.tag
     self.indexs = get_index(node)
     self.expression = []
     assert len(node) == 3
     for subnode in node[1:]:
         if subnode.tag == "drs":
             self.expression.append(drs(subnode))
         elif subnode.tag == "app":
             self.expression.append(app(subnode))
         elif subnode.tag == "merge":
             self.expression.append(merge(subnode))
         elif subnode.tag == "alfa":
             self.expression.append(alfa(subnode))
         elif subnode.tag == "sdrs":
             self.expression.append(sdrs(subnode))
         else:
             print subnode.tag
             assert False, "unrecognized node"
Ejemplo n.º 30
0
    def parse(self, assign_node):
        if len(assign_node.targets) > 1:  return False

        if u.is_constant_definition(assign_node):
            return None

        self.name = assign_node.targets[0].id

        rhs = assign_node.value
        if isinstance(rhs, ast.Call):
            call_node = u.cast(rhs, ast.Call)
            self.parse_call(call_node)
            self.array_size = None

        elif isinstance(rhs, ast.Subscript):
            subscript_node = u.cast(rhs, ast.Subscript)
            call_node = u.cast(subscript_node.value, ast.Call)
            self.parse_call(call_node)
            self.array_size = u.get_index(subscript_node)
Ejemplo n.º 31
0
def _parse_opm_info(page):

    # check if there were no matches
    no_matches = re.findall(r'<h2>Search Results for ".*"</h2>No matches',
                            page)
    if no_matches:
        return None

    # check if this page only points to a representative structure
    rep = re.findall(
        r'Representative structure\(s\) of this protein: <br /> '
        r'<a href="protein\.php\?pdbid=([0-9a-zA-Z]{4})">', page)
    if rep:
        return {"representative": rep[0].upper()}

    opm_type = re.findall(r'<li><i>Type:</i> <a.*>(.*)</a>', page)
    opm_class = re.findall(r'<li><i>Class:</i> <a.*>(.*)</a>', page)
    opm_superfamily = re.findall(
        r'<li><i>Superfamily:</i> <a[^<]*>([^<]*)</a>', page)
    opm_family = re.findall(r'<li><i>Family:</i> <a[^<]*>([^<]*)</a>', page)
    opm_species = re.findall(r'<li><i>Species:</i> <i><a.*>(.*)</a></i>', page)
    opm_localization = re.findall(r'<li><i>Localization:</i> <a.*>(.*)</a>',
                                  page)

    related_ids = re.findall(r'"\?extrapdb=([0-9a-zA-Z]{4})"', page)
    related_ids = [x.upper() for x in related_ids]
    related_ids.sort()

    delta_g = re.findall(r'([-+]?[0-9]*\.?[0-9]+) kcal/mol', page)

    return {
        "type": opm_type[0].split(" ", 1)[1],
        "class": opm_class[0].split(" ", 1)[1],
        "superfamily": opm_superfamily[0].split(" ", 1)[1],
        "family": opm_family[0].split(" ", 1)[1],
        "species": opm_species[0].strip(),
        "localization": opm_localization[0],
        "related_ids": related_ids,
        "delta_g": try_float(get_index(delta_g, 0))
    }
Ejemplo n.º 32
0
Archivo: Api.py Proyecto: keat01/pyLoad
    def __new__(cls, f, *args, **kwargs):
        fc = f.func_code

        try:
            i = get_index(fc.co_names, "user")
        except ValueError:  # functions does not uses user, so no need to modify
            return f

        user_context[f.__name__] = True
        new_names = tuple([x for x in fc.co_names if f != "user"])
        new_varnames = tuple([x for x in fc.co_varnames] + ["user"])
        new_code = fc.co_code

        # subtract 1 from higher LOAD_GLOBAL
        for x in range(i + 1, len(fc.co_names)):
            new_code = new_code.replace(
                chr(opmap['LOAD_GLOBAL']) + chr(x),
                chr(opmap['LOAD_GLOBAL']) + chr(x - 1))

        # load argument instead of global
        new_code = new_code.replace(
            chr(opmap['LOAD_GLOBAL']) + chr(i),
            chr(opmap['LOAD_FAST']) + chr(fc.co_argcount))

        new_fc = code(fc.co_argcount + 1, fc.co_nlocals + 1, fc.co_stacksize,
                      fc.co_flags, new_code, fc.co_consts, new_names,
                      new_varnames, fc.co_filename, fc.co_name,
                      fc.co_firstlineno, fc.co_lnotab, fc.co_freevars,
                      fc.co_cellvars)

        f.func_code = new_fc

        # None as default argument for user
        if f.func_defaults:
            f.func_defaults = tuple([x for x in f.func_defaults] + [None])
        else:
            f.func_defaults = (None, )

        return f
Ejemplo n.º 33
0
    def set_tiles(self, x, y, width, height, data):
        min_index = self.create_tile_index(x, y)
        max_index = self.create_tile_index(x + width - 1, y + height - 1)

        current_x = 0
        current_y = 0
        for ychunk in range(min_index.ychunk, max_index.ychunk + 1):
            base_y = current_y
            current_x = 0
            for xchunk in range(min_index.xchunk, max_index.xchunk + 1):
                xoffset = 0
                yoffset = 0
                xend = self.xtiles_per_chunk - 1
                yend = self.ytiles_per_chunk - 1
                if xchunk == min_index.xchunk:
                    xoffset = min_index.xoffset
                if xchunk == max_index.xchunk:
                    xend = max_index.xoffset
                if ychunk == min_index.ychunk:
                    yoffset = min_index.yoffset
                if ychunk == max_index.ychunk:
                    yend = max_index.yoffset
                chunk_data = self.load_chunk(xchunk, ychunk)

                base_x = current_x
                row_index = 0
                for row in range(yoffset, yend + 1):
                    index = utils.get_index(xoffset, row,
                                            self.xtiles_per_chunk)
                    row_width = xend - xoffset + 1

                    data_index = base_x + (base_y + row_index) * width

                    chunk_data[index:index +
                               row_width] = data[data_index:data_index +
                                                 row_width]
                    current_x = row_width + base_x
                    row_index += 1
                current_y = base_y + row_index
Ejemplo n.º 34
0
    def get_tiles(self, x, y, width, height):
        min_index = self.create_tile_index(x, y)
        max_index = self.create_tile_index(x + width - 1, y + height - 1)

        result = np.zeros((width, height), dtype=utils.TILE_DTYPE)
        current_x = 0
        current_y = 0
        for ychunk in range(min_index.ychunk, max_index.ychunk + 1):
            base_y = current_y
            current_x = 0
            for xchunk in range(min_index.xchunk, max_index.xchunk + 1):
                xoffset = 0
                yoffset = 0
                xend = self.xtiles_per_chunk - 1
                yend = self.ytiles_per_chunk - 1
                if xchunk == min_index.xchunk:
                    xoffset = min_index.xoffset
                if xchunk == max_index.xchunk:
                    xend = max_index.xoffset
                if ychunk == min_index.ychunk:
                    yoffset = min_index.yoffset
                if ychunk == max_index.ychunk:
                    yend = max_index.yoffset
                data = self.load_chunk(xchunk, ychunk)

                base_x = current_x
                row_index = 0
                for row in range(yoffset, yend + 1):
                    index = utils.get_index(xoffset, row,
                                            self.xtiles_per_chunk)
                    row_width = xend - xoffset + 1
                    row_data = data[index:index + row_width]
                    result[base_x:base_x + row_width,
                           base_y + row_index] = row_data
                    current_x = row_width + base_x
                    row_index += 1
                current_y = base_y + row_index
        return result.T