コード例 #1
0
ファイル: MultiFeatures.py プロジェクト: shl202/CSE252C
 def __init__(self, features):
     Features.__init__(self)
     self.features = features
     d = 0
     for i in range(len(features)):
         d += features[i].GetCount()
         self.SetCount(d)
コード例 #2
0
 def __init__(self, conf):
     Features.__init__(self)
     nc = 0
     for i in range(kNumLevels):
         nc += (i + 1)**2
     self.SetCount(kNumBins * nc)
     print "Histogram bins:", self.GetCount()
コード例 #3
0
    def generateFeatures(self):

        featureGenerator = Features()
        self.featuresFunctionsHandMade = featureGenerator.features()
        self.featuresFunctions = Features.featuresFromSentences(self.sentences)
        self.weigths = np.random.uniform(low=0.0,
                                         high=1.0,
                                         size=(len(
                                             self.featuresFunctionsHandMade)))
コード例 #4
0
def extract_and_transform(avm, df, transform_y):
    f = Features()
    return f.extract_and_transform_X_y(
        df,
        f.ege(avm.features_group),
        layout_transactions.price,
        'natural',
        'natural',
        transform_y,
    )
コード例 #5
0
def extract_and_transform(avm, df, transform_y):
    f = Features()
    return f.extract_and_transform_X_y(
        df,
        f.ege(avm.features_group),
        layout_transactions.price,
        'natural',
        'natural',
        transform_y,
    )
コード例 #6
0
def getFeatureFunctions(args):
    """
    Based on the user's choice pass the appropriate functions
    For more details look into features class
    :param args:
    :return:
    """
    functionsArray = Features.getSupportedFunctions()
    functionsList = sub1FromList(args.features)
    featureFunctions = functionsArray[functionsList]
    return Features(featureFunctions)
コード例 #7
0
 def read_and_save_features(path_to_audio_dataset, path_to_save_features):
     Preprocesor.create_structure(path=path_to_save_features)
     labels = np.sort(os.listdir(path_to_audio_dataset))
     for dir_filename in labels:
         path_to_word_dataset = path_to_audio_dataset + os.sep + dir_filename
         if (os.path.isdir(path_to_word_dataset) == 0):
             continue
         features_pack = []
         for file in tqdm(os.listdir(path_to_word_dataset)):
             input, rate = sf.read(file=path_to_word_dataset + os.sep +
                                   file)
             features = Features(input=input, rate=rate)
             features_to_save = features.wav_to_features()
             features_pack.append(features_to_save)
         np.save(file=path_to_save_features + os.sep + dir_filename,
                 arr=features_pack)
コード例 #8
0
 def __init__(self,
              graph,
              settings):
     """
     :param graph (matplotlib.pyplot.scatter): graph instance that
         needs to be filled while exporing the neighbourhood
     :param settings (Settings): settings of the graph to be plotted and
         bees algorithm itself
     Settings that are used here:
         determing elite and nonelite sites:
             - BEESNUM
             - ELITE
             - NONELITE
             - RECRUITEDELITE
             - RECRUITEDNONELITE
         plotting the points on the graph:
             - getbest()
             - getlocalbest()
             - SIZELOCALBEST
             - OPACITYLOCALBEST
             - getrgbcolor()
     """
     self.graph = graph
     self.settings = settings
     self.globalbest = Point(Coordinate(float('inf'), float('inf'), float('inf')),
                             Features(None, None, None))
     self.wasglobalbest = False
     self.fabric = CoordinateFabric(settings)
     self.sites = []
     self.pointscontroller = PointsController()
コード例 #9
0
 def __init__(self, row, store_score=False):
     self.essay_id = row[0]
     self.essay_set = int(row[1])
     text = row[2]
     if store_score:
         self.score = self.get_score(self.essay_set, row)
     self.features = Features(text)
コード例 #10
0
def load_features(img_dirs, feature_names):
    """
    loads features from file
    :param img_dirs: a list of image paths
    :type img_dirs: list
    :param feature_names: a list of feature names
    :type feature_names: list
    :return: dict (img_dir, features) features is also a dict (feature_name, feature vector)
    :rtype: dict
    """

    features = OrderedDict()

    for img_dir in img_dirs:
        features[img_dir] = OrderedDict()
        for feature_name in feature_names:
            if Features.is_TU_feature(feature_name):
                feature = _load_TU_feature(img_dir, feature_name)

                if feature is None:
                    #replace with zeros of correct vector size
                    feature = np.zeros(
                        features[img_dirs[0]][feature_name].shape)

            else:
                feature = load_precalc_feature(img_dir, feature_name)

            features[img_dir][feature_name] = feature
    return features
コード例 #11
0
def save_features(img_dir, feature_name, feature):
    """
    saves features of image in .txt file (compressed in gzip format)
    :param img_dir: path of the image
    :type img_dir: str
    :param feature_name: name of the feature (should be one of Features enum)
    :type feature_name: str
    :param feature: the feature vector
    :type feature: np.array
    :return:
    :rtype:
    """

    #create feature file path
    feature_file_path = os.path.join(
        img_dir[:img_dir.find('videos')], 'features', 'Features_From_TUWien',
        'Image_Subtask', feature_name,
        img_dir[img_dir.find('videos') + 7:] + '.txt.gz')

    if platform.system() == 'Linux':
        dirs = feature_file_path[:feature_file_path.rfind('/')]  #Linux system
    else:
        dirs = feature_file_path[:feature_file_path.rfind(
            '\\')]  #windows system

    if not os.path.exists(dirs):
        os.makedirs(dirs)

    if Features.is_single_val_feature(feature_name):
        #feature is a single value
        with gzip.open(feature_file_path, "w") as f:
            f.write(str(feature))
    else:
        np.savetxt(feature_file_path, feature, newline=' ')
コード例 #12
0
 def add(self, point):
     # ADDS POINT TO THE HOLDER AND UPDATES OTHER POINTS IF NECESSARY
     rgba = self.decideoncolor(point.z)
     if (point.z < self.currentbestz):
         if (self.currentbestid != -1):
             self.holder.changesize(self.currentbestid,
                                    self.settings.SIZEBAD)
             self.holder.changeopacity(self.currentbestid,
                                       self.settings.OPACITYBAD)
         rgba.append(self.settings.OPACITYGOOD)
         self.currentbestz = point.z
         self.currentbestid = point.id
         features = Features(self.settings.SIZEGOOD, rgba)
     else:
         rgba.append(self.settings.OPACITYBAD)
         features = Features(self.settings.SIZEBAD, rgba)
     self.holder.add(ColoredPoint(point, features))
コード例 #13
0
    def loadFeatures(self):
        np.set_printoptions(threshold=np.nan)

        features = loadmat(datadir + 'features/cache.binAudLSTM_' + self.type +
                           '_scene' + str(self.sceneid) + '/' + self.name)
        features = np.array(features["x"])

        self.fClass = Features(features)
コード例 #14
0
def parseInputFile(inputFileName):
    featureObjects = []
    with open(inputFileName, 'r') as inputFile:
        csvFile = csv.reader(inputFile)
        for line in csvFile:
            feature = Features(line[0], line[1])
            featureObjects.append(feature)

    return featureObjects
コード例 #15
0
ファイル: chart07.py プロジェクト: seyi/re-avm
 def make_details(data, test_months, n_best, n_worst):
     'return a ColumnTable'
     extra_info = []
     feature_names = Features().ege_names(control.arg.features)
     columns_table = ColumnsTable((
         ('test_month', 6, '%6s', ('test', 'month'), 'test month'),
         ('nth', 2, '%2d', (' ', 'n'), 'rank of feature (1 ==> more frequently included)'),
         ('probability', 4, '%4.1f', (' ', 'prob'), 'probability feature appears in a decision tree'),
         ('feature_name', 40, '%40s', (' ', 'feature name'), 'name of feature'),
         ),
         verbose=True)
     for test_month in test_months:
         value = data[ReductionKey(test_month)]
         if 'feature_importances' not in value.importances:
             # one month has an ensemble model
             # skip that month
             print 'chart a sees an unexpected ensemble model'
             print 'test_month', test_month
             print 'value', value
             print 'value.importance', value.importances
             print 'skipping the test month'
             print 'entering debugger'
             pdb.set_trace()
         importances = value.importances['feature_importances']
         assert value.importances['features_group'] == control.arg.features, value
         model = value.model
         assert type(model) == ResultKeyGbr or type(model) == ResultKeyRfr
         sorted_indices = importances.argsort()  # sorted first lowest, last highest
         for nth_best in xrange(n_best):
             if nth_best == len(feature_names):
                 break
             index = sorted_indices[len(importances) - nth_best - 1]
             columns_table.append_detail(
                 test_month=test_month,
                 nth=nth_best + 1,
                 probability=importances[index] * 100.0,
                 feature_name=feature_names[index]
                 )
             extra_info.append([test_month, nth_best+1, importances[index]*100.0, feature_names[index]])
         for nth in xrange(n_worst):
             break  # skip, for now
             if nth == len(feature_names):
                 break
             nth_worst = n_worst - nth - 1
             index = sorted_indices[nth_worst]
             columns_table.append_detail(
                 test_month=test_month,
                 nth=len(importances) - nth_worst,
                 probability=importances[index] * 100.0,
                 feature_name=feature_names[index]
                 )
         if n_best > 1 or n_worst > 1:
             # insert blank line between test_months if more than 1 row in a month
             columns_table.append_detail()
     columns_table.append_legend()
     return columns_table, extra_info
コード例 #16
0
 def gettor(cls, coordinate):
     """
     :param coordinate (Coorindate): coordinate of the point in Cartesian space
     :return (Point): TOR point that corresponds to coordinate provided
         (see more in settings documentation)
     """
     color = cls.FILLCOLORTOR
     features = Features(cls.SIZETOR, color, cls.BORDERCOLORTOR)
     res = Point(coordinate, features)
     return res
コード例 #17
0
ファイル: main.py プロジェクト: Arulselvanmadhavan/ConvNets
def getFeatureFunctions(args):
    """
    Based on the user's choice pass the appropriate functions
    For more details look into features class
    :param args:
    :return:
    """
    functionsArray = Features.getSupportedFunctions()
    functionsList = sub1FromList(args.features)
    featureFunctions = functionsArray[functionsList]
    return Features(featureFunctions)
コード例 #18
0
def classify(args):
    out = pickle.load(open(args.m, 'rb'))
    params = out[0]
    dict_rev = out[1]
    if args.m == "odia" or "odia.torch":
        mydata = Features(350, 'unk-odia.vec', "fasttext.wiki.300d.vec",
                          args.i)
    else:
        mydata = Features(350, 'unk.vec', "glove.6B.50d.txt", args.i)

    model = NNComp(20, 0.01, 32, 40, 15000, 4)
    out, _ = model.forward(mydata.final_data, params)

    labels = np.argmax(out, axis=1)

    preds = np.array([dict_rev.get(str(k)) for k in labels])

    with open(args.o, "w") as file:
        for pred in preds:
            file.write(pred + "\n")
コード例 #19
0
 def getbad(cls, coordinate):
     """
     :param coordinate (Coorindate): coordinate of the point in Cartesian space
     :return (Point): LOCALBEST point that corresponds to coordinate provided
         (see more in settings documentation)
     """
     color = cls.getrgbcolor(coordinate.z)
     color.append(cls.OPACITYBAD)
     features = Features(cls.SIZEBAD, color, cls.BORDERCOLORBAD)
     res = Point(coordinate, features)
     return res
コード例 #20
0
ファイル: predict.py プロジェクト: seyi/re-avm
def do_work(control):
    'write predictions to output csv file'
    samples = pd.read_csv(
        control.path_in_samples,
        nrows=10 if control.arg.test else None,
        usecols=None,  # TODO: change to columns we actually use
        low_memory=False,
    )
    apns = samples[layout_transactions.apn]
    sale_dates = samples[layout_transactions.sale_date]
    print 'read %d rows of samples from file %s' % (len(samples),
                                                    control.path_in_samples)

    # iterate over the fitted models
    hps_predictions = {}
    for root, dirnames, filenames in os.walk(control.path_in_fitted):
        assert len(dirnames) == 0, dirnames
        print root, len(filenames)
        for filename in filenames:
            suffix_we_process = '.pickle'
            if not filename.endswith(suffix_we_process):
                print 'skipping file without a fitted model: %s' % filename
                continue
            hps_string = filename[:-len(suffix_we_process)]
            hps = HPs.from_str(hps_string)
            path_to_file = os.path.join(root, filename)
            with open(path_to_file, 'r') as f:
                ok, fitted_model = pickle.load(f)
            if ok:
                print 'predicting samples using fitted model %s' % filename
                X, y = Features().extract_and_transform(
                    samples, hps['units_X'], hps['units_y'])
                predictions = fitted_model.predict(X)
                assert len(predictions) == len(samples)
                assert hps_string not in hps_predictions
                hps_predictions[hps_string] = predictions
            else:
                print 'not not predict samples using fitted model %s; reason: %s' % (
                    filename,
                    fitted_model,  # an error message
                )
        # have all the predictions for all filenames (= a set of hyperparameters)
        print 'walked all %d files' % len(filenames)
    out = {
        'apns': apns,
        'sale_dates': sale_dates,
        'hps_predictions': hps_predictions,
    }
    with open(control.path_out_file, 'w') as f:
        pickle.dump(out, f)
    print 'wr0te results to %s' % control.path_out_file
    return
コード例 #21
0
ファイル: train.py プロジェクト: Amirhesam-ghml/Sample_Codes
def train(args):
    if args.i == "datasets/odia.train.txt":
        mydata    = Features(args.f,'unk-odia.vec',args.E,args.i,"Train")
        dim = 300
    else:
        mydata    = Features(args.f,'unk.vec',args.E,args.i,"Train")
        dim = 50
    model = NNComp(args.u,args.l,args.b,args.e,args.f*dim,len(mydata.labelname) )

    param,Train_cost,Test_cost = model.run(mydata.final_data,mydata.lables_number)
    
    plt.plot(Train_cost,'b',Test_cost,'r--')
    plt.ylabel('Loss')
    plt.xlabel('epochs')
#     plt.xticks([0,5,10,15,20,25])
    plt.suptitle('Train/vald loss')
    red_patch = mpatches.Patch(color='red', label='Validation')
    blue_patch = mpatches.Patch(color='blue', label='Train')
    plt.legend(handles=[red_patch,blue_patch])
    
    plt.show()   
    
    
    out = (param,mydata.labeldict_rev)
コード例 #22
0
def classify(args):
    out = pickle.load(open(args.m, 'rb'))
    dict_rev = out[5]
    model = out[0]
    if args.m == ("odia" or "odia.torch"):
        mydata    = Features(out[2],'unk-odia.vec',"fasttext.wiki.300d.vec",args.i)     
        
    else:
        mydata    = Features(out[2],'unk.vec',"glove.6B.50d.txt",args.i)
        
    
    model.eval()
    
    
    out= model.forward(torch.from_numpy(mydata.final_data).float())
    
    labels = np.argmax(out.detach().numpy(),axis=1) 
    
    preds = np.array([ dict_rev.get(k) for k in labels])
    

    with open(args.o, "w") as file:
        for pred in preds:
            file.write(pred+"\n")
コード例 #23
0
ファイル: chart07.py プロジェクト: seyi/re-avm
 def make_mean_importance_by_feature(test_months):
     'return dict[feature_name] = float, the mean importance of the feature'
     feature_names = Features().ege_names(control.arg.features)
     mean_importance = {}  # key = feature_name
     for feature_index, feature_name in enumerate(feature_names):
         # build vector of feature_importances for feature_name
         feature_importances = np.zeros(len(test_months))  # for feature_name
         for month_index, test_month in enumerate(test_months):
             month_importances = data[ReductionKey(test_month)]  # for each feature
             all_feature_importances = month_importances.importances['feature_importances']
             if 'feature_importances' not in month_importances.importances:
                 print 'chart b sees an unexpected ensemble model'
                 print 'test_month', test_month
                 print 'month_importances', month_importances
                 print 'entering debugger'
                 pdb.set_trace()
             feature_importances[month_index] = all_feature_importances[feature_index]
         mean_importance[feature_name] = np.mean(feature_importances)
     return mean_importance
コード例 #24
0
    def resultOfFeaturesInWeigths(self, sentence):

        arranges = list(
            itertools.permutations(self.possibleLabels,
                                   len(sentence.words) + 1))

        probabilityOfArrange = []

        for (indexTop, arrange) in enumerate(arranges):
            sumOfFeatures = 0

            for (index, feature) in enumerate(self.featuresFunctions):
                for position in range(0, len(sentence.labels)):
                    current = arrange[position]
                    previous = arrange[position - 1]
                    response = Features.verify(feature, current, previous)
                    sumOfFeatures += self.weigths[index] * response
            probabilityOfArrange.append(sumOfFeatures)
        bestIndices = np.argpartition(probabilityOfArrange, -1)[-1:]

        return bestIndices[0]
def scale_features_old(features):
    """
    scales all feature vectors in features
    :param features:
    :type features: dict
    :return: scaled features
    :rtype: dict
    """

    scaled_features = OrderedDict()
    for img_dir in features:
        scaled_features[img_dir] = dict()
        for feature_name in features[img_dir]:
            if Features.is_single_val_feature(feature_name):
                scaled_features[img_dir][feature_name] = features[img_dir][
                    feature_name]
            else:
                scaled_features[img_dir][feature_name] = preprocessing.scale(
                    features[img_dir][feature_name])
                #scaled_features[img_dir][feature_name] = preprocessing.minmax_scale(features[img_dir][feature_name])

    return scaled_features
def gen_feature_matrices_per_feature(features):
    """
    generates feature matrix for each feature which can be used to train SVM
    :param features:
    :type features: dict
    :return: feature_matrices: (feature_name, matrix)
    :rtype: dict
    """
    feature_matrices = dict()

    for img_dir in features:
        for feature_name in features[img_dir]:
            if Features.is_single_val_feature(feature_name):
                vector = np.asscalar(features[img_dir][feature_name])
            else:
                vector = features[img_dir][feature_name].tolist()

            #add vector to matrix
            if feature_name not in feature_matrices:
                feature_matrices[feature_name] = list()

            feature_matrices[feature_name].append(vector)

    return feature_matrices
def gen_final_feature_matrix_old(features):
    """
    generates the final feature matrix which can be used to train SVM
    :param features:
    :type features: dict
    :return: final feature matrix
    :rtype: np.array
    """

    final_feature_mat = []

    for img_dir in features:
        final_feature_vec = []
        for feature_name in features[img_dir]:
            if Features.is_single_val_feature(feature_name):
                final_feature_vec.append(
                    np.asscalar(features[img_dir][feature_name]))
            else:
                final_feature_vec.extend(
                    features[img_dir][feature_name].tolist())

        final_feature_mat.append(final_feature_vec)

    return np.asarray(final_feature_mat)
コード例 #28
0
ファイル: PreProcessing.py プロジェクト: Zerkles/SRUDA
    def choose_feature(self,list_of_metods,X,Y,columns,iteration):
        features=[]
        
        for metod in list_of_metods:
            print(metod)
            switcher={
                "sfm_lr": lambda : features.append(Features.select_features_select_from_model_LR(X, Y, columns, iteration).tolist()),
                "sfm_linearsvc":lambda:features.append(Features.select_features_select_from_model_linearsvc(X, Y, columns, iteration).tolist()),
                "sfm_rfc":lambda:features.append(Features.select_features_select_from_model_RandomForest(X, Y, columns, iteration).tolist()),
                "sfm_lasso":lambda:features.append(Features.select_features_select_from_model_lasso(X, Y, columns, iteration).tolist()),#last sfm

                "rle_lr":lambda:features.append(Features.select_features_RFE_LR(X, Y, columns, iteration).tolist()),
                "rle_linearsvc":lambda:features.append(Features.select_features_RFE_linearsvc(X, Y, columns, iteration).tolist()),
                "rle_rfc":lambda:features.append(Features.select_features_RFE_RandomForest(X, Y, columns, iteration).tolist()),
                "rle_lasso":lambda:features.append(Features.select_features_RFE_lasso(X, Y, columns, iteration).tolist()),#last rle

                "permutation_lr":lambda:features.append(Features.select_features_permutation_LR(X, Y, columns, iteration).tolist()),
                "permutation_linearsvc":lambda:features.append(Features.select_features_permutation_linearsvc(X, Y, columns, iteration).tolist()),
                "permutation_rfc":lambda:features.append(Features.select_features_permutation_RandomForest(X, Y, columns, iteration).tolist()),
                "permutation_lasso":lambda:features.append(Features.select_features_permutation_lasso(X, Y, columns, iteration).tolist())

             }.get(metod,lambda: None)()
        flatten = [val for sublist in features for val in sublist]#flatten list
        

        features=list(dict.fromkeys(flatten))# delete duplicates
        return features       
コード例 #29
0
def do_work(control):
    'write fitted models to file system'
    def make_transaction_ids(df):
        'return dates and apns for the query samples'
        result = []
        for index, row in df.iterrows():
            next = TransactionId(
                sale_date=row[layout_transactions.sale_date],
                apn=row[layout_transactions.apn],
            )
            result.append(next)
        return result

    def read_csv(path):
        df = pd.read_csv(
            path,
            nrows=100 if control.arg.test else None,
            usecols=None,  # TODO: change to columns we actually use
            low_memory=False
        )
        print 'read %d samples from file %s' % (len(df), path)
        return df

    def in_prediction_month(query_samples, prediction_YYYYMM):
        'return DataFrame of sample in the month we are predicting'
        def splitYYYYMMDD(dates):
            year_factor = 10000.0
            years = (dates / year_factor).astype('int64')
            month_factor = 100.0
            months = ((dates - years * year_factor) / month_factor).astype('int64')
            return years, months

        def splitYYYYMM(date_str):
            date = int(date_str)
            year_factor = 100.0
            year = int(date / year_factor)
            month = int(date - year * year_factor)
            return year, month

        sale_dates = query_samples[layout_transactions.sale_date]
        query_years, query_months = splitYYYYMMDD(sale_dates)
        prediction_year, prediction_month = splitYYYYMM(prediction_YYYYMM)
        mask_year = query_years == prediction_year
        mask_month = query_months == prediction_month
        mask = mask_year & mask_month
        result = query_samples.loc[mask]
        return result

    # reduce process priority, to try to keep the system responsive
    lower_priority()

    with open(control.path_out_feature_names, 'w') as f:
        feature_names = Features().ege_names('swpn')
        pickle.dump(feature_names, f)

    training_samples = read_csv(control.path_in_training_samples)

    query_samples_all = read_csv(control.path_in_query_samples)
    query_samples = in_prediction_month(query_samples_all, control.arg.prediction_month)
    print 'read %s query samples of which %d are in the prediction month %s' % (
        len(query_samples_all),
        len(query_samples),
        control.arg.prediction_month,
    )
    with open(control.path_out_transaction_ids, 'w') as f:
        transaction_ids = make_transaction_ids(query_samples)
        pickle.dump(transaction_ids, f)
    with open(control.path_out_actuals, 'w') as f:
        X, actuals = Features().extract_and_transform(query_samples, 'natural', 'natural')
        pickle.dump(actuals, f)

    count_fitted = 0
    n_hps = make_n_hps(control.arg.model)

    # determine hps we have already fitted and predicted
    already_seen = set()
    if os.path.exists(control.path_out_predictions_attributes):
        with open(control.path_out_predictions_attributes, 'r') as f:
            unpickler = pickle.Unpickler(f)
            try:
                while True:
                    hps_str, predictions, fitted_attributes = unpickler.load()
                    print 'existing', hps_str
                    already_seen.add(hps_str)
            except EOFError as e:
                pass
    print 'have already seen %d hps_str values' % len(already_seen)

    # fit and predict HPs that we have not already seen
    with open(control.path_out_predictions_attributes, 'w') as results_file:
        pickler = pickle.Pickler(results_file)
        for hps in HPs.iter_hps_model(control.arg.model):
            count_fitted += 1
            start_time = time.clock()  # wall clock time on Windows, processor time on Unix
            hps_str = HPs.to_str(hps)
            if hps_str in already_seen:
                print 'skipping already seen: %s' % hps_str
                continue
            try:
                predictions, fitted_attributes, n_training_samples = fit_and_predict(
                    training_samples,
                    query_samples,
                    hps, control,
                )
                pickler.dump((hps_str, predictions, fitted_attributes))
                pickler.clear_memo()  # don't build up a large data structure
                print 'fit-predict #%4d/%4d on:%6d in: %6.2f %s %s %s %s hps: %s ' % (
                    count_fitted,
                    n_hps,
                    n_training_samples,
                    time.clock() - start_time,
                    control.arg.training_data,
                    control.arg.neighborhood,
                    control.arg.model,
                    control.arg.prediction_month,
                    hps_str,
                )
            except Exception as e:
                print 'exception: %s' % e
                pdb.set_trace()
                pickler.dump((hps_str, e))

            # collect to get memory usage stable, so that we can run this program many time in parallel
            gc.collect()
            if control.arg.test and count_fitted == 5:
                print 'breaking because we are testing'
                break
コード例 #30
0
 def X_y(df):
     return Features().extract_and_transform(df, hps['units_X'], hps['units_y'])
コード例 #31
0
ファイル: main.py プロジェクト: ziv0808/NLP_hw2
# if BASIC_MODEL = False then the complex model will be created
BASIC_MODEL = True
# if TRAIN_WITH_MST = False then for training the perceptron will use the greedy method - faster
TRAIN_WITH_MST = True

max_accuracy = 0.83
if BASIC_MODEL == True:
    max_accuracy = 0.74
# load train file
train_words, train_pos, train_heads = read_file_and_preprocess('train.labeled', include_y=True)

# devide word lists into sent lists
sent_word_list, sent_pos_list, sent_head_list = create_sentences_from_word_lists(train_words, train_pos, train_heads)
if True == BASIC_MODEL:
    # create features instance for basic model
    featurs_basic_obj = Features(train_words, train_pos, train_heads, features_to_include_list=[1,2,3,4,5,6,8,10,13])
else:
    # creates features for the complex model
    featurs_basic_obj = Features(train_words, train_pos, train_heads, features_to_include_list='ALL')

# init weight vector
basic_feature_weights_vec = np.zeros(featurs_basic_obj.feature_wieghts_len, dtype=np.float64)

# create sentence full graph and for each edge assigns the relevant features list
# also calcs the feature vector for each empiric observation - for optimization
sent_graph_list = []
sent_real_feat_idx = []
sent_graph_edges_feats = []
for m in range(len(sent_word_list)):
    # full graph
    sent_graph_list.append(build_sentence_full_graph(len(sent_word_list[m])))
コード例 #32
0
import argparse

from Features import Features

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--db_path',
                        type=str,
                        default='images/',
                        help='Path of the image database')

    args = parser.parse_args()

    dbpath = args.db_path

    # List of features that stores
    feat = []
    base_feat = []

    features = Features()

    features.input_img(dbpath, feat, base_feat)

    features.compute_codebook(feat)

    features.compute_bow(feat)

    features.compute_tfidf()

    features.compute_baseline(base_feat)