Python SMOTE Examples

Programming Language: Python

Namespace/Package Name: smote

Method/Function: SMOTE

Examples at hotexamples.com: 4

Python SMOTE - 4 examples found. These are the top rated real world Python examples of smote.SMOTE extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: oversample.py Project: arunrajeie/dedup-simulator

def oversample(args):
    print("+++ Reading input", file=sys.stderr)
    hashes, samples = read_input()

    print("+++ Converting to numpy array", file=sys.stderr)
    np_samples = np.array(samples, np.int32)

    print("+++ Performing SMOTE", file=sys.stderr)
    new_samples = smote.SMOTE(np_samples, args.smote_amount, args.neighbors)

    print("+++ Outputting new files", file=sys.stderr)
    output_new_files(hashes, new_samples, args)

Example #2

Show file

File: makeAmodel.py Project: pfjob09/Transfer-Learning

    def csv2py(self, filename, _smote=False, duplicate=False):
        "Convert a csv file to a model file"
        tbl = table(filename)
        if _smote:
            tbl = smote.SMOTE(tbl,
                              atleast=50,
                              atmost=101,
                              bugIndx=1,
                              resample=duplicate)
        self.str2num(tbl)
        tonum = lambda x: self.translate[x] if isinstance(x, str) else x
        """ There's a bug in table.py that doesn't separate dependent and independent
      Variable. The following, badly written, piece of code corrects for it...
  """
        for indx, k in enumerate(tbl.indep):
            for l in tbl.depen:
                if k.name == l.name:
                    tbl.indep.pop(indx)

        return self.data(indep=[i.name for i in tbl.indep],
                         less=[i.name for i in tbl.depen],
                         _rows=map(lambda x: [tonum(xx) for xx in x.cells],
                                   tbl._rows))

Example #3

Show file

File: pipeline.py Project: biocq/miRNA_MVCT

    negTestLines = open("mohsen/80-20-hsa1/20_expression_neg.csv",
                        'r').readlines()[1:]

    posTraining = np.array([[float(y) for y in x.split(',')[:-1]]
                            for x in posTrainLines])
    negTraining = np.array([[float(y) for y in x.split(',')[:-1]]
                            for x in negTrainLines])
    posTest = np.array([[float(y) for y in x.split(',')[:-1]]
                        for x in posTestLines])
    negTest = np.array([[float(y) for y in x.split(',')[:-1]]
                        for x in negTestLines])

    pList = []

    # Use SMOTE to deal with class imbalance
    posTraining = smote.SMOTE(posTraining,
                              100 * (len(negTraining) / len(posTraining)), 5)
    # Build a single negative+positive training set
    trainingArray = np.concatenate((posTraining, negTraining))
    trainingClasses = np.array(['1'] * len(posTraining) +
                               ['0'] * len(negTraining))
    # Build a single negative+positive test set
    testArray = np.concatenate((posTest, negTest))
    testClasses = np.array(['1'] * len(posTest) + ['0'] * len(negTest))
    # Build classifier on training data
    rf = RandomForestClassifier(n_estimators=500)
    rf.fit(trainingArray, trainingClasses)

    # joblib.dump(rf, "smirpdeep_model.pkl")
    # Get results for test set
    predictions = rf.predict_proba(testArray)
    predictions = np.hstack((predictions, np.atleast_2d(testClasses).T))

Example #4

Show file

#{0.0: 10, 1.0: 417, 2.0: 7, 3.0: 9, 4.0: 106 }
'''
formula to fix no. of samples: no_of_samples_you_want = x * no. of neighbors / 100 
you have to provide x , x must be < 100 or multiple of 100  
'''
print "smoting time for level 0 "
## smoting time for level 0  
# get the records per sample 
classVal = float(0)
records_per_class_0 = smote_utility.getRecordsPeClass(classVal, the_data_set)
#print records_
array_shaped_record = np.array(records_per_class_0)
print "original datatset ", array_shaped_record.shape
count_extra_synthetic_samples = 3200  ## fix samples based on number of nerighbors 
nearest_nieghbors = 10 ### Expected n_neighbors <= n_samples, level 0 has 10 samples
smoted_dataset_0 = smote.SMOTE(array_shaped_record.shape, array_shaped_record, count_extra_synthetic_samples, nearest_nieghbors)
print "smoted dataset shape: level-0::", smoted_dataset_0.shape 
print "-----"


print "smoting time for level 1 "
## smoting time for level 1
# get the records per sample 
classVal = float(1)
records_per_class_1 = smote_utility.getRecordsPeClass(classVal, the_data_set)
#print records_
array_shaped_record = np.array(records_per_class_1)
print "original datatset ", array_shaped_record.shape
count_extra_synthetic_samples = 50
nearest_nieghbors = 10 ### Expected n_neighbors <= n_samples : level has 417 samples, going with 10 
smoted_dataset_2 = smote.SMOTE(array_shaped_record.shape, array_shaped_record, count_extra_synthetic_samples, nearest_nieghbors)