Beispiel #1
0
 def plr_sv_feature(name):
     fst_name = f"fst_{name}"
     snd_name = f"snd_{name}"
     if fst_name in dct and snd_name in dct:
         fst_val = rl.SizedValue(*dct[fst_name])
         snd_val = rl.SizedValue(*dct[snd_name])
         self.features.append(
             feature.Feature(name=fst_name, value=fst_val, flip_value=snd_val)
         )
         self.features.append(
             feature.Feature(name=snd_name, value=snd_val, flip_value=fst_val)
         )
Beispiel #2
0
    def __init__(self, args, opt_column=None):
        """Constructs a collection features.

    Args:
      args: constructor argument.  One of:
          1) A string - assumed to be the name of a collection.
          2) A number - assumed to be the ID of a Fusion Table.
          3) A geometry.
          4) A feature.
          5) An array of features.
          6) A computed object - reinterpreted as a collection.
      opt_column: The name of the geometry column to use. Only useful with the
          string or number constructor arguments.

    Raises:
      EEException: if passed something other than the above.
    """
        self.initialize()

        # Wrap geometries with features.
        if isinstance(args, geometry.Geometry):
            args = feature.Feature(args)

        # Wrap single features in an array.
        if isinstance(args, feature.Feature):
            args = [args]

        if ee_types.isNumber(args) or ee_types.isString(args):
            # An ID.
            actual_args = {'tableId': args}
            if opt_column:
                actual_args['geometryColumn'] = opt_column
            super(FeatureCollection, self).__init__(
                apifunction.ApiFunction.lookup('Collection.loadTable'),
                actual_args)
        elif isinstance(args, (list, tuple)):
            # A list of features.
            super(FeatureCollection, self).__init__(
                apifunction.ApiFunction.lookup('Collection'),
                {'features': [feature.Feature(i) for i in args]})
        elif isinstance(args, ee_list.List):
            # A computed list of features.
            super(FeatureCollection,
                  self).__init__(apifunction.ApiFunction.lookup('Collection'),
                                 {'features': args})
        elif isinstance(args, computedobject.ComputedObject):
            # A custom object to reinterpret as a FeatureCollection.
            super(FeatureCollection, self).__init__(args.func, args.args,
                                                    args.varName)
        else:
            raise ee_exception.EEException(
                'Unrecognized argument type to convert to a FeatureCollection: %s'
                % args)
Beispiel #3
0
 def plr_feature(name):
     fst_name = f"fst_{name}"
     snd_name = f"snd_{name}"
     if fst_name in dct and snd_name in dct:
         fst_val = dct[fst_name]
         snd_val = dct[snd_name]
         self.features.append(
             feature.Feature(name=fst_name, value=fst_val, flip_value=snd_val)
         )
         self.features.append(
             feature.Feature(name=snd_name, value=snd_val, flip_value=fst_val)
         )
 def get_feature(self, sep, algorithm):
     #特征选择
     feature_inst = feature.Feature()
     feature_inst.data2vector(data = self.data_inst, sep = sep)
     feature_inst.select_feat(feat_num = self.feat_num, algorithm = algorithm)
     self.feat_names = feature_inst.select_feat_name
     self.feat_inst = feature_inst
Beispiel #5
0
def main(argv):
    del argv

    o_data_source = tushare_data.DataSource(20000101, '', '', 1, 20120101, 20200106, False, False, True)
    o_feature = feature.Feature(30, feature.FUT_D5_NORM, 1, False, False)
    # o_feature = feature.Feature(30, feature.FUT_D5_NORM, 1, False, False)
    # o_feature = feature.Feature(30, feature.FUT_5REGION5_NORM, 5, False, False)
    # o_feature = feature.Feature(30, feature.FUT_2AVG5_NORM, 5, False, False)
    o_wave = ExtremeWave(o_data_source, o_feature, 2, 2, False, 0, 0.1, 5)
    split_date = 20180101
    o_dl_model = dl_model.DLModel('%s_%u' % (o_wave.setting_name, split_date), 
                                  o_feature.feature_unit_num, 
                                  o_feature.feature_unit_size,
                                  32, 10240, 0.004, 'mean_absolute_tp0_max_ratio_error')
    if FLAGS.mode == 'data':
        o_data_source.DownloadData()
        o_data_source.UpdatePPData()
    elif FLAGS.mode == 'testall':
        o_wave.TradeTestAll()
    elif FLAGS.mode == 'test':
        o_wave.TradeTestStock(FLAGS.c, FLAGS.show)
    elif FLAGS.mode == 'show':
        o_wave.ShowTradePP(FLAGS.c)
    elif FLAGS.mode == 'train':
        tf, tl, vf, vl, td = o_wave.GetDataset(split_date)
        tl = tl * 100.0
        vl  = vl * 100.0
        o_dl_model.Train(tf, tl, vf, vl, FLAGS.epoch)
    elif FLAGS.mode == 'rtest':
        tf, tl, tf, tl, ta = o_wave.GetDataset(split_date)
        o_dl_model.LoadModel(FLAGS.epoch)
        o_wave.RTest(o_dl_model, tf, ta, False)
        
    exit()
Beispiel #6
0
def lr_main(train, test, outfilepath):
    '''
    Linear Regression main function
    
    train: filepath of train.csv
    test: filepath of test_X.csv
    outfilepath: filepath of predicted_result.csv
    '''
    # Load in data & preprocessing
    training_data = pd.read_csv(train, sep=",", encoding="big5") 

    # Training
    W_best = []
    for i in range(30):
        train = feature.Feature(training_data)
        train, d = train.scaling()
        train.add_bias()
        train, _ = train.bagging(train)
        W = train_lr(train, iternum=100000, lamb=1e-2)
        W_best.append(W)

    with open("./model/W_best.pkl", "wb") as o:
        pickle.dump(W_best, o)
    with open("./model/W_best.csv", "w") as o:
        for W in W_best:
            for line in W:
                o.write(str(line))
                o.write("\n")
    
    # Testing and output result
    test = pd.read_csv(test, sep=",", header=None)
    test_lr(W_best, test, d, outfilepath, train)
Beispiel #7
0
 def build_aprx(self):
     self.features = []
     for feature_n in xrange(len(self.X[0])):
         data = []
         for i in xrange(len(self.X)):
             data.append([self.X[i][feature_n], self.Y[i]])
         ftr = feature.Feature()
         ftr.create_vars(data)
         ftr.solve_theta()
         self.features.append(ftr)
Beispiel #8
0
	def binary_feature(self, sentence, type_feature):
		self.sentence = sentence

		#define unclean temporary array data train and label
		train = []

		#jika training iis maka lakukan pencarian binary feature dengan label
		#contoh : (dict(f1=0, f2=0, f3=0, f4=0, f5=0, f6=0, f7=1, f8=1, f9=0, f12=0, f10=0, f11=0}, "NUM"))
		if type_feature == "train_iis":
			for index, data in enumerate(sentence): 
				label = []
				token = word_tokenize(data)
				for index, data in enumerate(token):
					if "/" in data :
						#add label to array
						label.append(self.lbl.search(token[index]).group(1))
						#add word to array
						token[index] = self.w.search(token[index]).group(1)
					else:
						label.append("O")
				for index, data in enumerate(token):
					#feature processing panggil class feature
					featuretrain = f.Feature()
					result = featuretrain.template_feature(token, label, index)
					#result = template_feature(token, label, index)
					train.append(result) 
		else:
			#jika training ner atau selain iis maka hanya melakukan pencarian binary feature, tidak dengan label
			#contoh : (dict(f1=0, f2=0, f3=0, f4=0, f5=0, f6=0, f7=1, f8=1, f9=0, f12=0, f10=0, f11=0}))
			token = word_tokenize(sentence)
			label = []
			for index, data in enumerate(token):
				#feature processing panggil class feature
				featuretrain = f.Feature()
				result = featuretrain.template_feature(token, label, index)
				#result = template_feature(token, label, index)
				train.append(result) 

		# filter array empty/none karena Other atau entitas O tidak diproses
		train_set = filter(None, train)
		#print train_set
		return train_set
Beispiel #9
0
 def window_func(win):
     col_time = 'time'
     col_vals = list(win.columns)
     col_vals.remove('time')
     col_vals.remove('event_seizure')
     col_event = 'event_seizure'
     time = win[col_time]
     vals = win[col_vals]
     event = win[col_event]
     ## TODO: problem with data stack if add info data
     func = feature.Feature(time=time, vals=vals, event=event).func()
     return func
def add_feature_dif_bonus(features,
                          feature_name,
                          sex,
                          pid1,
                          pid2,
                          min_date=None,
                          max_date=None):
    value = get_dif_bonus(sex,
                          pid1,
                          pid2,
                          min_date=min_date,
                          max_date=max_date)
    features.append(
        feature.Feature(name=feature_name, value=value, flip_value=-value))
Beispiel #11
0
    def get_feature(self, fname):
        feature = self.features.get(fname)
        if feature is None:
            feature_info = self.content['features'].get(fname)
            if feature_info is None:
                return None
            import feature as F
            feature = F.Feature(fname,
                                feature_info.get('name', None),
                                self.version,
                                updatesite=self.site_root,
                                **feature_info)
            self.features[fname] = feature

        return feature
Beispiel #12
0
def find_features(where):
    import path, feature

    features = []
    where = path.path(where)
    for plat_dir in where.dirs():
        if plat_dir.name not in feature.compatible_platform_versions:
            continue

        for feat_dir in plat_dir.dirs():
            feat = feature.Feature(feat_dir.name,
                                   platform_version=plat_dir.name,
                                   dir=where)
            features.append(feat)

    return features
Beispiel #13
0
def feature_generate(pair):
    ftr = feature.Feature()
    res = []

    #  ekstraksi fitur-fitur pasangan kalimat dalam bentuk list
    for x, y in pair:
        fc = ftr.first_capital(x, y)
        cd = ftr.capital_diff(x, y)
        sld = ftr.sent_lenght_diff(x, y)
        cod = ftr.comma_diff(x, y)
        dd = ftr.dot_diff(x, y)
        it = ftr.italic_diff(x, y)
        cb = ftr.count_bigram(x, y)
        cp = ftr.count_postag(x, y)
        sim = ftr.similarity(x, y)

        res.append([fc, cd, sld, cod, dd, it, cp, cd, sim])

    return res
Beispiel #14
0
def main(argv):
    del argv

    o_data_source = tushare_data.DataSource(20000101, '', '', 1, 20000101,
                                            20200403, False, False, True)
    o_feature = feature.Feature(30, feature.FUT_D5_NORM_PCT, 1, False, False)
    # o_feature = feature.Feature(30, feature.FUT_D5_NORM, 1, False, False)
    # o_feature = feature.Feature(30, feature.FUT_5REGION5_NORM, 5, False, False)
    # o_feature = feature.Feature(30, feature.FUT_2AVG5_NORM, 5, False, False)
    o_trade = Breakup(o_data_source, o_feature, PPI_close_100_avg, 10, 10, 3.0,
                      0.1)
    split_date = 20180101
    o_dl_model = dl_model.DLModel('%s_%u' % (o_trade.setting_name, split_date),
                                  o_feature.feature_unit_num,
                                  o_feature.feature_unit_size, 32, 10240,
                                  0.004, 'mean_absolute_tp0_max_ratio_error')
    if FLAGS.mode == 'data':
        o_data_source.DownloadData()
        o_data_source.UpdatePPData()
    elif FLAGS.mode == 'testall':
        o_trade.TradeTestAll(True, FLAGS.show)
    elif FLAGS.mode == 'test':
        o_data_source.DownloadStockData(FLAGS.c)
        o_data_source.UpdateStockPPData(FLAGS.c)
        start_time = time.time()
        # o_trade.TradeTestStock(FLAGS.c, FLAGS.show)
        o_trade.Test(FLAGS.c)
    elif FLAGS.mode == 'train':
        tf, tl, vf, vl, td = o_trade.GetDataset(split_date)
        tl = tl * 100.0
        vl = vl * 100.0
        o_dl_model.Train(tf, tl, vf, vl, FLAGS.epoch)
    elif FLAGS.mode == 'rtest':
        tf, tl, tf, tl, ta = o_trade.GetDataset(split_date)
        o_dl_model.LoadModel(FLAGS.epoch)
        o_trade.RTest(o_dl_model, tf, ta, False)
    elif FLAGS.mode == 'dsw':
        dataset = o_trade.ShowDSW3DDataset()
    elif FLAGS.mode == 'show':
        dataset = o_trade.ShowTradePP(FLAGS.c)

    exit()
Beispiel #15
0
def lr_main(train, test, outfilepath):
    '''
    Linear Regression main function
    
    train: filepath of train.csv
    test: filepath of test_X.csv
    outfilepath: filepath of predicted_result.csv
    '''

    training_data = pd.read_csv(train, sep=",", encoding="big5")

    train = feature.Feature(training_data)
    W_best = train_lr(train, model=1)
    '''
    with open("./model/W.pkl", "wb") as o:
        pickle.dump(W_best, o)
    '''
    # Testing and output result
    test = pd.read_csv(test, sep=",", header=None)
    test_lr(W_best, test, outfilepath, train)
Beispiel #16
0
def main():
    """
    Controls the flow
    """
    start = time.time()

    # load the datasets
    loader = load.DataLoader()
    # return the training and the testing datasets
    train, test, strat, streets = loader.load_data()

    featurer = feature.Feature(train, test, streets)
    train, test, targets_tr, features = featurer.format()

    runner = classifier.Modeler(train, test, targets_tr, strat, features)
    runner.run_predictions()
    runner.run_Kfold()

    end = time.time()
    print "Total Time: " + str(end - start)
Beispiel #17
0
def add_features(features, date, h2hobj, time_discounts=(0.95, 0.90, 0.85, 0.80, 0.75)):
    assert h2hobj is not None
    assert date is not None
    items = [
        Item(
            is_left_win=h2hobj.fst_player == mch.first_player,
            date=mch.date if mch.date else tour.date,
        )
        for tour, mch, _ in h2hobj.tour_match_aset
    ]
    calc = Calc(date, items)
    for time_discount in time_discounts:
        feat_name = "h2h_{:.2f}".format(time_discount).replace(".", "")
        calc.time_discount = time_discount
        value = calc.direct()
        features.append(
            feature.Feature(
                name=feat_name,
                value=value,
                flip_value=None if value is None else 1.0 - value,
            )
        )
Beispiel #18
0
def lr_main(train, test, outfilepath):
    '''
    Linear Regression main function
    
    train: filepath of train.csv
    test: filepath of test_X.csv
    outfilepath: filepath of predicted_result.csv
    '''

    training_data = pd.read_csv(train, sep=",", encoding="big5")

    train = feature.Feature(training_data)
    train, d = train.scaling()
    train.add_bias()

    with open("./model/W_sbl.pkl", "rb") as w:
        W_best = pickle.load(w)

    # Testing and output result
    test = pd.read_csv(test, sep=",", header=None)
    test_lr(W_best, test, d, outfilepath, train)
    '''
Beispiel #19
0
def main(argv):
    del argv
    o_data_source = tushare_data.DataSource(20000101, '', '', 1, 20100101, 20200306, False, False, True)
    o_feature = feature.Feature(7, feature.FUT_D5_NORM, 1, False, False)
    o_vol_wave = VolWave(o_data_source, o_feature, 0.1)
    # split_date = 20180101
    # o_dl_model = dl_model.DLModel('%s_%u' % (o_vol_wave.setting_name, split_date), 
    #                               o_feature.feature_unit_num, 
    #                               o_feature.feature_unit_size,
    #                               32, 10240, 0.004, 'mean_absolute_tp0_max_ratio_error')
    if FLAGS.mode == 'data':
        o_data_source.DownloadData()
        o_data_source.UpdatePPData()
    elif FLAGS.mode == 'testall':
        o_vol_wave.TradeTestAll(True, FLAGS.show)
    elif FLAGS.mode == 'test':
        o_data_source.DownloadStockData(FLAGS.c)
        o_data_source.UpdateStockPPData(FLAGS.c)
        start_time = time.time()
        o_vol_wave.TradeTestStock(FLAGS.c, FLAGS.show)
        print(time.time() - start_time)
    elif FLAGS.mode == 'train':
        tf, tl, vf, vl, td = o_vol_wave.GetDataset(split_date)
        tl = tl * 100.0
        vl  = vl * 100.0
        o_dl_model.Train(tf, tl, vf, vl, FLAGS.epoch)
    elif FLAGS.mode == 'rtest':
        tf, tl, tf, tl, ta = o_vol_wave.GetDataset(split_date)
        o_dl_model.LoadModel(FLAGS.epoch)
        o_vol_wave.RTest(o_dl_model, tf, ta, False)
    elif FLAGS.mode == 'dsw':
        dataset = o_vol_wave.ShowDSW3DDataset()
    elif FLAGS.mode == 'show':
        dataset = o_vol_wave.ShowTradePP(FLAGS.c)

    exit()
 savestring = bstring[8] + '/' + bstring[9]
 #        save_csv1=os.path.join(saving_path1,str(save_name)+'M.txt')
 save_image = os.path.join(save_map, str(save_name))
 ###   get the image and label
 data_source = (np.loadtxt(label_source_path, dtype=np.str,
                           delimiter=","))[1:, 1:].astype(np.float)
 img_source = cv2.imread(file_source_path, 1)
 img_target = cv2.imread(file_target_path, 1)
 ###   pre-register
 pre = register.Pre(img_source, img_target, fixedsize)
 M_warp = pre.pre_register()
 width2 = pre.width2
 height2 = pre.height2
 img_warp = cv2.warpAffine(img_source, M_warp, (width2, height2))
 ###   feature register
 feature1 = feature.Feature(img_warp, img_target, shrink_num2)
 M2, I, lenmatch, threshold, in_num, Ir = feature1.register()
 ###   get the shrink image after transform,and the mix of two image
 shrink1 = feature1.dimg1
 shrink2 = feature1.dimg2
 w2, h2 = feature1.width2, feature1.height2
 warp = cv2.warpAffine(shrink1, M2, (w2, h2))
 merge = np.uint8(shrink2 * 0.5 + warp * 0.5)
 cv2.imwrite(str(save_image) + '-mix.jpg', merge)
 plt.imshow(merge)
 plt.show()
 ###   get the transform of the total size
 M = M2
 M[0, 2] = 5 * M2[0, 2]
 M[1, 2] = 5 * M2[1, 2]
 M_warp = np.vstack((M_warp, [0, 0, 1]))
Beispiel #21
0
def main(argv):
    del argv

    end_date = 20200306
    split_date = 20100101
    o_data_source = tushare_data.DataSource(20000101, '', '', 10, 20000101,
                                            end_date, False, False, True)
    o_feature = feature.Feature(10, feature.FUT_D5_NORM, 1, False, False)
    obj = OpenClose(o_data_source, o_feature, not FLAGS.overlap_feature)
    o_dl_model = dl_model.DLModel(
        '%s_%u' % (obj.setting_name, split_date),
        o_feature.feature_unit_num,
        o_feature.feature_unit_size,
        # 32, 10240, 0.04, 'mean_absolute_tp0_max_ratio_error') # rtest<0
        # 4, 10240, 0.04, 'mean_absolute_tp0_max_ratio_error') # rtest<0
        # 4, 10240, 0.01, 'mean_absolute_tp0_max_ratio_error') # rtest:0.14
        32,
        10240,
        0.03,
        'mean_absolute_tp_max_ratio_error_tanhmap',
        50)  # rtest:0.62
    # 16, 10240, 0.01, 'mean_absolute_tp0_max_ratio_error') # rtest<0
    # 16, 10240, 0.01, 'mean_absolute_tp_max_ratio_error_tanhmap', 100)
    if FLAGS.mode == 'datasource':
        o_data_source.DownloadData()
        o_data_source.UpdatePPData()
    elif FLAGS.mode == 'dataset':
        obj.CreateDataSet()
    elif FLAGS.mode == 'public_dataset':
        obj.CreateDataSet()
        public_dataset = obj.PublicDataset()
        file_name = './public/data/dataset.npy'
        np.save(file_name, public_dataset)
    elif FLAGS.mode == 'train':
        tf, tl, vf, vl, td = obj.GetDataset(split_date)
        # tf, tl, vf, vl, va = obj.GetDatasetRandom(0.5)
        train_epoch = FLAGS.epoch if FLAGS.epoch > 0 else 250
        o_dl_model.Train(tf, tl, vf, vl, train_epoch)
    elif FLAGS.mode == 'rtest':
        tf, tl, vf, vl, va = obj.GetDataset(split_date)
        # tf, tl, vf, vl, va = obj.GetDatasetRandom(0.5)
        o_dl_model.LoadModel(FLAGS.epoch)
        obj.RTest(o_dl_model, vf, va, False)
    # elif FLAGS.mode == 'dqntest':
    #     o_dl_model.LoadModel(FLAGS.epoch)
    #     o_dsfa = dsfa3d_dataset.DSFa3DDataset(o_data_source, o_feature)
    #     o_dqn_test = dqn_test.DQNTest(o_dsfa, split_date, o_dl_model)
    #     o_dqn_test.Test(1, FLAGS.pt, True, FLAGS.show)
    # elif FLAGS.mode == 'dqntestall':
    #     o_dl_model.LoadModel(FLAGS.epoch)
    #     o_dsfa = dsfa3d_dataset.DSFa3DDataset(o_data_source, o_feature)
    #     o_dqn_test = dqn_test.DQNTest(o_dsfa, split_date, o_dl_model)
    #     o_dqn_test.TestAllModels(1, FLAGS.pt)
    # elif FLAGS.mode == 'predict':
    #     o_dl_model.LoadModel(FLAGS.epoch)
    #     o_data_source.SetPPDataDailyUpdate(20180101, 20200323)
    #     o_dsfa = dsfa3d_dataset.DSFa3DDataset(o_data_source, o_feature)
    #     o_dqn_test = dqn_test.DQNTest(o_dsfa, split_date, o_dl_model)
    #     o_dqn_test.Test(1, FLAGS.pt, True, FLAGS.show)
    elif FLAGS.mode == 'dsw':
        dataset = obj.ShowDSW3DDataset()
    elif FLAGS.mode == 'show':
        dataset = obj.ShowTradePP(FLAGS.c)
    elif FLAGS.mode == 'showlabel':
        dataset = obj.ShowLabel()
    elif FLAGS.mode == 'debug':
        dataset = np.load(obj.FileNameDataset())
        print("dataset: {}".format(dataset.shape))
        dataset = np_common.Sort2D(dataset, [obj.index_increase], [False])
        dataset = dataset[:5]
        obj.ShowDataSet(dataset, 'dataset')
    elif FLAGS.mode == 'clean':
        obj.Clean()
        o_dl_model.Clean()
    elif FLAGS.mode == 'pp':
        o_data_source.ShowStockPPData(FLAGS.c, FLAGS.date)
    elif FLAGS.mode == 'vol':
        o_data_source.ShowAvgVol(100000)

    exit()
Beispiel #22
0
def build_data(fname, user_cluster=None, pv_model=None, cv=10):
    """
    Loads and process data.
    """
    feature_extractor = feature.Feature()

    revs = []
    vocab = defaultdict(float)
    ins_idx, tw_w_clu = 0, 0
    users = set()

    if pv_model is not None:
        dim = pv_model.layer1_size
        keys = ["PV_%d" % i for i in range(dim)]
        if user_cluster is not None:
            key_clus = ["PV_clu_%d" % i for i in range(dim)]

    with open(fname, "rb") as f:
        for line in f:
            line = line.strip()
            spidx = line.rfind("|")
            rev = line[:spidx].strip()
            metas = line[spidx + 1:].strip().split(",")
            label = int(metas[0])
            cluster = 0
            if user_cluster is not None and metas[1] in user_cluster:
                cluster = user_cluster[metas[1]]
                users.add(metas[1])
                tw_w_clu += 1
            if len(metas) > 2:
                split = int(metas[2])
            else:
                #split = np.random.randint(0,cv)
                split = ins_idx % cv
            nrc_feat = feature_extractor.NRC_feature_extractor(line)
            orig_rev = clean_str(rev)
            words = set(orig_rev.split())
            for word in words:
                vocab[word] += 1
            if pv_model is not None:
                pv_dict = dict(zip(keys, pv_model['SENT_%d' % ins_idx]))
                if user_cluster is not None:
                    pv_dict.update(
                        dict(zip(key_clus, pv_model['CLUSTER_%d' % cluster])))
            else:
                pv_dict = {}
            datum = {
                "y": label,
                "text": orig_rev,
                "features": nrc_feat,
                "pv": pv_dict,
                "num_words": len(orig_rev.split()),
                "cluster": cluster,
                "split": split
            }
            revs.append(datum)
            ins_idx += 1
    max_l = np.max(pd.DataFrame(revs)["num_words"])

    logger.info(
        "finish building data: %d tweets, in which %d tweets of %d users have link information"
        % (ins_idx, tw_w_clu, len(users)))
    logger.info(
        "tweets of users have no links are associated with cluster ID 0 (the biggest cluster)"
    )
    logger.info("vocab size: %d, max tweet length: %d" % (len(vocab), max_l))
    return revs, vocab, max_l
Beispiel #23
0
        img2_i = np.zeros(img2.shape, dtype=np.uint8)
        mask2 = mask2s[m][:] > 0
        for i in range(3):
            img1_i[:, :, i] = img1[:, :, i] * mask1  #+(1-mask1)*255
            img2_i[:, :, i] = img2[:, :, i] * mask2  #+(1-mask2)*255
###   pre register the connected domains
        pre = register.Pre(img1_i, img2_i, 200)
        M_warp = pre.pre_register()
        width2 = pre.width2
        height2 = pre.height2
        img_warp_i = cv2.warpAffine(img1_i, M_warp, (width2, height2))
        plt.subplot(121), plt.imshow(img_warp_i)
        plt.subplot(122), plt.imshow(img2_i)
        plt.show()
        ###   get feature and register
        feature1 = feature.Feature(img_warp_i, img2_i, 1)
        ###   get the M ,register match image ,string lenmatch,number of interpoints,match image
        M, I_, lenmatch, threshold, interiornum, Ir = feature1.register()
        match_path = os.path.join(result_path, 'match')
        match_filename = os.path.join(match_path,
                                      str(base) + str(m) + '-r.jpg')
        cv2.imwrite(match_filename, Ir)
        #                M=cv2.getRotationMatrix2D((0,0),0,1)
        img3_i = cv2.warpAffine(img_warp_i, M, (width2, height2))
        ###   get the NGF of image and image after register
        pre2 = register.Pre(img3_i, img2_i, 200)
        dst1, dst2 = pre2.crop_cross()
        distance, ngf = pre2.NGF(dst1, dst2)
        ###   get the total martix of pre-register and feature register
        M2_ = np.vstack((M_warp, [0, 0, 1]))
        M1_ = np.vstack((M, [0, 0, 1]))
Beispiel #24
0
    title = file_pt.readline()
    output = open(data_io.get_paths()["submission_path"], "a")
    tot_fet = None
    for l in file_pt.readlines():
        res = l.split(",")
        fet = f.create_features_from_res(res)
        if tot_fet == None:
            tot_fet = fet
        else:
            tot_fet = numpy.vstack((tot_fet, fet))
        pred = classifier.predict_proba(fet[:, 3:])
        sorted_pred = sorted(zip(res[1].split(), pred[:, 1]),
                             key=lambda a: a[1],
                             reverse=True)
        #print sorted_pred
        output.write(res[0] + "," +
                     " ".join(map(lambda a: a[0], sorted_pred)) + "\n")
    output.close()
    numpy.savetxt(data_io.get_paths()["test_feature_path"],
                  tot_fet.astype(float),
                  fmt='%f',
                  delimiter=",")


if __name__ == "__main__":
    p = parser.Parser()
    p.parse_csv()
    f = feature.Feature(p)
    classifier = train(f, data_io.get_paths()["train_path"])
    predict(f, classifier, data_io.get_paths()["valid_path"])
Beispiel #25
0
def main():
    ###    write the name of information
    with open(result_filename, 'a', encoding='utf-8') as f1:
             f1.writelines('{0:<20}{1:<65}{2:<25}{3:<25}{4:<15}{5:<15}{6:<20}\n'.
                           format('dir ','name', 'rTRE(median_rTRE)','match_num','threshold','inliers','times'))
###   get the path of image/label file
    for dir1 in image_dir1:
        print(dir1)     
        image_path1=os.path.join(image_path, dir1)
        image_dir2 = sorted(os.listdir(image_path1))
        readlabel = os.path.join(label_path, dir1)
        m=os.listdir(readlabel)[0]
        for dir2 in image_dir2:
            saving_path = os.path.join(result_path, str(dir1),str(dir2))
            if not os.path.exists(saving_path):
                os.makedirs(saving_path)
            image_path2=os.path.join(image_path1, dir2)
            file_image=os.listdir(image_path2)
            for x in file_image:
                str_img=str(x)[-4:]
            file_target_path = os.path.join(image_path2,str(dir2)+str_img)
            file_target=os.path.basename(file_target_path)
            file = [x for x in file_image if x !=file_target]
            label_target_path=os.path.join(readlabel,m,str(file_target)[:-4]+'.csv')
            for idx in file:
                file_source_path=os.path.join(image_path2,str(idx))
                label_source_path=os.path.join(readlabel,m,str(os.path.basename(idx)[:-4])+'.csv')
###   get the time of the begining of register
                start_time1 = time.time()
###   get the image and the label
                img_source = cv2.imread(file_source_path,1)
                img_target = cv2.imread(file_target_path,1)
                data_source = (np.loadtxt(label_source_path, dtype=np.str, delimiter=","))[1:, 1:].astype(np.float)
                data_target = (np.loadtxt(label_target_path, dtype=np.str, delimiter=","))[1:, 1:].astype(np.float)
###   define path to save
                save_name = os.path.join(os.path.basename(idx)[:-4]+'to'+file_target[:-4])
                save_path = os.path.join(saving_path, save_name)
                error_image=os.path.join(error_path,str(save_name)+str(dir1)+'.jpg')
                save_data_path=os.path.join(saving_path,'datalabel')
                if not os.path.exists(save_data_path):
                    os.makedirs(save_data_path)
                print('load:',time.time()-start_time1)
###   pre-register
                pre=register.Pre(img_source,img_target,fixedsize)
                M_warp=pre.pre_register()
                width2=pre.width2
                height2=pre.height2
                img_warp=cv2.warpAffine(img_source, M_warp, (width2, height2))
                print('pre:',time.time()-start_time1)
###   feature register
                feature1=feature.Feature(img_warp,img_target,shrink_num)
                M,I,lenmatch,threshold,in_num,Ir=feature1.register()
                print('feature:',time.time()-start_time1)
###   get the shrink image after transform,and the mix of two image
                shrink1=feature1.dimg1
                shrink2=feature1.dimg2
                w2,h2=feature1.width2,feature1.height2
                warp = cv2.warpAffine(shrink1, M, (w2, h2))
                merge = np.uint8(shrink2* 0.5 + warp * 0.5)
                plt.imshow(merge)
                plt.show()
###   save images
                cv2.imwrite(str(save_path)+'-merge.jpg',merge)
                cv2.imwrite(str(save_path)+'-warp.jpg', warp)
                cv2.imwrite(str(save_path)+'-match.jpg', Ir)
                cv2.imwrite(str(save_path)+'-register.jpg', I)
### get the transform of the total size
                M1=M
                M1[0,2]=5*M1[0,2]
                M1[1,2]=5*M1[1,2]
                M1_=np.vstack((M1,[0,0,1]))
                M2_=np.vstack((M_warp,[0,0,1]))
                M3=np.dot(M1_,M2_)
#                   np.savetxt(os.path.join(save_M ,str(idx)[:-4]+'.txt'),M1)
#                    data_warp=(f.get_data(data_source,M_warp)[1:,1:]).astype('float')
###   get the rTRE and the new data of landmarks,save data
                rTRE,_,_,data_save= c.get_TRE(data_source, data_target, M3,height2,width2)
                data_save1=c.get_data(data_source,M3)
                np.savetxt(os.path.join(save_data_path,str(idx)[:-4]+'.csv'),data_save,
                           fmt=('%s,%s,%s'),delimiter=',')
                np.savetxt(os.path.join(save_data_path,str(idx)[:-4]+'-1.csv'),data_save1,
                           fmt=('%s,%s,%s'),delimiter=',')
                print('save:',time.time()-start_time1)
                print(rTRE)
###   save image that rTRE is bigger than 0.02
                if rTRE>0.02:
                    cv2.imwrite(str(error_image), merge)
                del data_save,data_source,data_target,#inliers,nkp1_s,nkp1_t
                gc.collect()
                del img_source,img_target,I
                gc.collect()
# =============================================================================
#                     ###   draw the landmarks in the image
#                     for i in range(len(data_source)):
#                         x=data_source[i][0]
#                         y=data_source[i][1]
#                         k=np.array((x,y),np.int32).reshape((-1,1,2))
#                         cv2.polylines(img_target,k,True,(0,255,0),100)
#                     for i in range(len(data_target)):
#                         x=data_target[i][0]
#                         y=data_target[i][1]
#                         k=np.array((x,y),np.int32).reshape((-1,1,2))
#                         cv2.polylines(img_target,k,True,(255,0,0),100)
#                     for i in range(1,len(data_save)):
#                         x3=np.float32(data_save[i][1])
#                         y3=np.float32(data_save[i][2])
#                         k=np.array((x3,y3),np.int32).reshape((-1,1,2))
#                         cv2.polylines(img_target,k,True,(0,0,255),100)
# =============================================================================
                end_time1 = time.time()
                print(idx)
                print(save_name)
                time_register =(end_time1 - start_time1)
###    write some information needed
                with open(result_filename, 'a', encoding='utf-8') as f1:
                    f1.writelines('{0:<20}{1:<65s}{2:<25s}{3:<25s}{4:<15s}{5:<15s}{6:<20s}\n'
                                  .format(str(dir1),str(save_name),str(rTRE),str(lenmatch),
                                          str(threshold),str(in_num),str(time_register)))
#                with open(result_filename, 'a', encoding='utf-8') as f1:
#                    f1.writelines('{0:<20}{1:<65s}{2:<25s}{3:<15s}{4:<15s}{5:<15s}\n'
#                                  .format(str(dir1),str(save_name),str(rTRE),str(err)[:8],str(iteration),str(time_register)))
    end_time = time.time()
    sumtime =(end_time-start_time)
    with open(result_filename, 'a', encoding='utf-8') as f1:
        f1.writelines('{0:<20}{1:<20n}\n'.format('all of time:',sumtime))
    print(result_path)
Beispiel #26
0
        feature_list = kwargs.get('feature_list', None)
        if not feature_list:
            self.name = self.name+'(-irt)'
        self.train_x = self.select_features(self.feature.features_train, feature_list)
        self.train_y = self.feature.label_train.values
        self.feature_names = self.train_x.columns
        # 评估训练集上的效果
        self.train_y_pred = self.predict(self.train_x)
        self.train_ev = self.evaluation.evaluate(y_true=self.train_y, y_pred=self.train_y_pred, threshold=0.5)

        return self

    def predict(self, x: pd.DataFrame = None) -> np.ndarray:
        if x is None:
            x = self.feature.features_test

        y_pred = self.predict_by_theta(x)
        return y_pred


if __name__ == "__main__":
    import feature

    ft = feature.Feature()
    ft.fit()
    # ft.select()

    model = Irt(ft)
    model.predict()
    model.evaluate()
Beispiel #27
0
CAR_SIMPLE = "simple"
CAR_USER = "******"
CAR_NESTED = "nested"
CAR_BELIEF = "belief"
CAR_CANNED = "canned"
CAR_NEURAL = "neural"
CAR_COPY = "copy"

# Rewards are various custom reward functions
REWARD_MIDDLE_LANE = "r_middle_lane"
REWARD_LEFT_LANE = "r_left_lane"
REWARD_RIGHT_LANE = "r_right_lane"

REWARD_FUNCTIONS = {
    REWARD_MIDDLE_LANE:
    feature.Feature(lambda t, x, u: -(x[0])**2),
    REWARD_LEFT_LANE:
    feature.Feature(lambda t, x, u: np.exp((-0.5 * (x[0] + 0.13)**2) / .04)),
    REWARD_RIGHT_LANE:
    feature.Feature(lambda t, x, u: -(x[0] - 0.13)**2),
}

# car_from {{{


# car_from constructs a car from a car declarative
# definition. e.g.,
#
#   def = {
#       "kind": CAR_SIMPLE,
#       "x0": [-.13, 0.0, math.pi/2., 0.5],
                         (self.code_index_map[ts_code], ts_code))

    def CreateDSFa3DDataset(self):
        dataset_file_name = self.FileNameDSFa3DDataset()
        self.dataset = np.zeros(
            (len(self.date_list), len(self.code_list), self.feature.unit_size))
        # base_common.ListMultiThread(CreateDSFa3DSplitMTFunc, self, 1, self.code_list)
        for ts_code in self.code_list:
            self.CreateDSFa3DSplitStock(ts_code)
        base_common.MKFileDirs(dataset_file_name)
        np.save(dataset_file_name, self.dataset)
        self.dataset = None

    def GetDSFa3DDataset(self):
        dataset_file_name = self.FileNameDSFa3DDataset()
        if not os.path.exists(dataset_file_name):
            self.CreateDSFa3DDataset()
        return np.load(dataset_file_name)


if __name__ == "__main__":
    data_source = tushare_data.DataSource(20000101, '', '', 1, 20000101,
                                          20200106, False, False, True)
    data_source.ShowStockCodes()

    o_feature = feature.Feature(30, feature.FUT_D5_NORM, 1, False, False)

    o_dataset = DSFa3DDataset(data_source, o_feature)
    temp_dataset = o_dataset.GetDSFa3DDataset()
    print("dataset: {}".format(temp_dataset.shape))
Beispiel #29
0
def main(argv):
    del argv
    code_filter = '\
000001.SZ,\
000002.SZ,\
000063.SZ,\
000538.SZ,\
000541.SZ,\
000550.SZ,\
000560.SZ,\
000561.SZ,\
000584.SZ,\
000625.SZ,\
000650.SZ,\
000651.SZ,\
000721.SZ,\
000800.SZ,\
000802.SZ,\
000858.SZ,\
000898.SZ,\
000913.SZ,\
000927.SZ,\
000932.SZ,\
000937.SZ,\
000938.SZ,\
000951.SZ,\
000959.SZ,\
001696.SZ,\
600000.SH,\
600006.SH,\
600085.SH,\
600104.SH,\
600109.SH,\
600115.SH,\
600137.SH,\
600177.SH,\
600198.SH,\
600199.SH,\
600600.SH,\
600601.SH,\
600609.SH,\
600612.SH,\
600623.SH,\
600624.SH,\
600664.SH,\
600679.SH,\
600702.SH,\
600718.SH,\
600809.SH'

    o_data_source = tushare_data.DataSource(20000101, '', code_filter, 1,
                                            20130101, 20200106, False, False,
                                            True)
    o_feature = feature.Feature(30, feature.FUT_D5_NORM, 1, False, False)
    # o_feature = feature.Feature(30, feature.FUT_D5_NORM, 1, False, False)
    # o_feature = feature.Feature(30, feature.FUT_5REGION5_NORM, 5, False, False)
    # o_feature = feature.Feature(30, feature.FUT_2AVG5_NORM, 5, False, False)
    o_avg_wave = AvgWave(o_data_source, o_feature, PPI_close_30_avg, MODE_GRAD,
                         0, 0, 0.1)
    split_date = 20180101
    o_dl_model = dl_model.DLModel(
        '%s_%u' % (o_avg_wave.setting_name, split_date),
        o_feature.feature_unit_num, o_feature.feature_unit_size, 32, 10240,
        0.004, 'mean_absolute_tp0_max_ratio_error')
    if FLAGS.mode == 'data':
        o_data_source.DownloadData()
        o_data_source.UpdatePPData()
    elif FLAGS.mode == 'testall':
        o_avg_wave.TradeTestAll(True, FLAGS.show)
    elif FLAGS.mode == 'test':
        o_data_source.DownloadStockData(FLAGS.c)
        o_data_source.UpdateStockPPData(FLAGS.c)
        start_time = time.time()
        o_avg_wave.TradeTestStock(FLAGS.c, FLAGS.show)
        print(time.time() - start_time)
    elif FLAGS.mode == 'train':
        tf, tl, vf, vl, td = o_avg_wave.GetDataset(split_date)
        tl = tl * 100.0
        vl = vl * 100.0
        o_dl_model.Train(tf, tl, vf, vl, FLAGS.epoch)
    elif FLAGS.mode == 'rtest':
        tf, tl, tf, tl, ta = o_avg_wave.GetDataset(split_date)
        o_dl_model.LoadModel(FLAGS.epoch)
        o_avg_wave.RTest(o_dl_model, tf, ta, False)
    elif FLAGS.mode == 'dsw':
        dataset = o_avg_wave.ShowDSW3DDataset()
    elif FLAGS.mode == 'show':
        dataset = o_avg_wave.ShowTradePP(FLAGS.c)

    exit()
Beispiel #30
0
import selector
import feature

# TEST 1 - Paper
candidates = ["Gun", "Bell"]
s1 = selector.Selector(candidates)

f1 = feature.Feature("Applicator Cost", 5)
s1.add_ndn_measurable_feature(f1, feature.Mode.MINIMAZING, [120e3, 140e3])

f2 = feature.Feature("Safety Issues", 5)
s1.add_immeasurable_feature(f2, [4, 3])

s1.calc_premium_values()

# TEST 2 - Paper
candidates = ["Gun", "Bell"]
s2 = selector.Selector(candidates)

f1 = feature.Feature("Transfer Efficiency", 3)
s2.add_immeasurable_feature(f1, [2, 4])

f2 = feature.Feature("Part Ground Sensitivity", 2)
s2.add_immeasurable_feature(f2, [4, 2])

f3 = feature.Feature("Penetration Into Dificult Areas", 2)
s2.add_immeasurable_feature(f3, [4, 1])

f4 = feature.Feature("Ease of Path Teaching", 3)
s2.add_immeasurable_feature(f4, [2, 4])