Example #1
0
def print_transformations(trainX_, trainY_):
    print("before crop")
    # for item in [trainX_, testX_, trainY_, testY_]:
    # 	print(item.shape)
    item_number = 2
    single_item_x = trainX_[None, item_number, :, :, :]
    single_item_y = trainY_[None, item_number, :, :, :]
    print(single_item_x.shape)
    print(single_item_y.shape)

    single_item_x, single_item_y = processData(
        [single_item_x, single_item_y],
        commands=["crop", "transpose", "flip_x", "flip_y"])

    print("after trans")
    print(single_item_x.shape)
    print(single_item_y.shape)

    for i in range(single_item_x.shape[0]):

        plot_threed(single_item_x[i], 'input', threshold=0.3, plot_num=4)
        plt.savefig('./plot/x' + str(i) + '.png', bbox_inches='tight')
        plot_fourd(single_item_y[i], 'truth', plot_num=5)
        plt.savefig('./plot/y' + str(i) + '.png', bbox_inches='tight')
    return clf_descr, pred

if __name__ == "__main__":

    model = loadVectors()

    # define the categories
    categories = [
        'stats',
        'math',
        'physics',
        'cs'
    ]

    print("Processing data...")
    abstractsTrain, y_train, abstractsTest = processData()
    if opts.test_fraction:
        percent = (opts.test_fraction * 100.0)
        print("Using only %.f percent of the training data" % percent)
        threshold = int(opts.test_fraction * len(abstractsTrain))
        if threshold == 0:
            print("Fraction too small, please choose a larger fraction")
            print()
            sys.exit(1)
        abstractsTrain = abstractsTrain[:threshold]
        y_train = y_train[:threshold]
    print("Train set size: %d documents" % len(abstractsTrain))
    print("Test set size: %d documents" % len(abstractsTest))
    print("done")
    print()
Example #3
0
if __name__ == '__main__':

    data_params = {
        'reload':
        False,  #When True, parse time domain raw data again, use when data changes
        'max_items_per_scan': 2,  # maximum number of items in a scanf
        'train_test_split': 0.7,  #size of training data
        'only_max': False,
        'saved_path': "../new_res/*.json",
        'use_backproj':
        True  # set to false to use clean signal instead of backproj
    }
    # reload_data()
    trainX_, testX_, trainY_, testY_ = loadData(**data_params)
    trainX, trainY = processData(
        [trainX_, trainY_], commands=["crop", "transpose", "flip_x", "flip_y"])
    testX, testY = processData([testX_, testY_], commands=["crop"])

    # trainX, trainY = processData([trainX_, trainY_],commands = ["crop"])
    # testX, testY = processData([testX_, testY_],commands = ["crop"])

    N = len(trainX)
    idx = np.arange(N)
    np.random.seed(5)
    np.random.shuffle(idx)
    trainX, trainY = trainX[idx], trainY[idx]

    # combinedX = np.concatenate((trainX,testX),axis = 0)
    # combinedY = np.concatenate((trainY,testY),axis = 0)# (34, 40, 20, 21, 5)
    # combinedY = np.reshape(combinedY,(combinedY.shape[0],-1))
    # trainY_flat= np.reshape(trainY,(trainY.shape[0],-1))
def load_data(filename, features):
    return preprocess.processData(filename, features)
    print("test time:  %0.3fs" % test_time)
    print()

    clf_descr = str(clf).split('(')[0]
    return clf_descr, pred


if __name__ == "__main__":

    model = loadVectors()

    # define the categories
    categories = ['stats', 'math', 'physics', 'cs']

    print("Processing data...")
    abstractsTrain, y_train, abstractsTest = processData()
    if opts.test_fraction:
        percent = (opts.test_fraction * 100.0)
        print("Using only %.f percent of the training data" % percent)
        threshold = int(opts.test_fraction * len(abstractsTrain))
        if threshold == 0:
            print("Fraction too small, please choose a larger fraction")
            print()
            sys.exit(1)
        abstractsTrain = abstractsTrain[:threshold]
        y_train = y_train[:threshold]
    print("Train set size: %d documents" % len(abstractsTrain))
    print("Test set size: %d documents" % len(abstractsTest))
    print("done")
    print()
Example #6
0
def main(_):
    print('preprocessing data ...')
    processData(FLAGS.year, FLAGS.domain, FLAGS.embedding)
    print('preparing data for model ...')
    trainData, testData, validData, sampleData = prepareData()