filename=data_dir + cell + "_" + str(wid) + "bp_Classes.txt"; classes=numpy.loadtxt(filename,delimiter='\t',dtype=object) filename=data_dir+ cell + "_Features.txt" features=numpy.loadtxt(filename,delimiter='\t',dtype=object) given=["A-E","I-E","A-P","I-P","A-X","I-X","UK"] #given=["A-E","I-E"] #given=["A-P","I-P"] #given=["A-E","A-P"] #given=["A-E","A-X"] #given=["A-P","A-X"] #given=["A-E","A-P","A-X"] #given=["A-E","I-E","A-P","I-P"] #given=["A-E","I-E","A-P","I-P","A-X","I-X"] #given=["I-E","I-P"] data,classes,_=cl.take_some_classes(data,classes,given=given,others=None) # balance the sample sizes of the classes rng=numpy.random.RandomState(1000) data,classes,others=cl.balance_sample_size(data,classes,others=None,min_size_given=None,rng=rng) print data.shape print numpy.unique(classes) #group=[["A-E"],["I-E"],["A-P"],["I-P"],["A-X"],["I-X"],["UK"]] #group=[["A-E","A-P"],["I-E","I-P","A-X","I-X","UK"]] #group=[["A-E","A-P","A-X"],["I-E","I-P","I-X","UK"]] group=[["A-E"],["A-P"],["I-E","I-P","A-X","I-X","UK"]] #group=[["A-E"],["A-P"],["A-X"],["I-E","I-P","I-X","UK"]] #group=[["A-E"],["I-E"]] #group=[["A-P"],["I-P"]]
Each row is a string (white space not allowed) as the feature name of the corresponding column in [1]. """ ################################## #load your data here ... ################################## filename="/home/yifengli/research/dnashape/result/Data_1000bp.txt"; data=numpy.loadtxt(filename,delimiter='\t',dtype='float16') filename="/home/yifengli/research/dnashape/result/Classes_1000bp.txt"; classes=numpy.loadtxt(filename,delimiter='\t',dtype=str) filename="/home/yifengli/research/dnashape/result/Features.txt"; features=numpy.loadtxt(filename,delimiter='\t',dtype=str) # change class labels given=["Enhancer","EnhancerFalse"] data,classes=cl.take_some_classes(data,classes,given) given={"Enhancer":0,"EnhancerFalse":1} classes=cl.change_class_labels_to_given(classes,given) train_set_x_org,train_set_y_org,valid_set_x_org,valid_set_y_org,test_set_x_org,test_set_y_org \ =cl.partition_train_valid_test(data,classes,ratio=(1,1,1)) del data gc_collect() rng=numpy.random.RandomState(1000) numpy.warnings.filterwarnings('ignore') # train classifier,training_time=convolutional_mlp.train_model( train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org, valid_set_x_org=valid_set_x_org, valid_set_y_org=valid_set_y_org, n_row_each_sample=4,
classes = numpy.loadtxt(filename, delimiter='\t', dtype=object) filename = data_dir + cell + "_Features.txt" features = numpy.loadtxt(filename, delimiter='\t', dtype=object) given = ["A-E", "I-E", "A-P", "I-P", "A-X", "I-X", "UK"] #given=["A-E","I-E"] #given=["A-P","I-P"] #given=["A-E","A-P"] #given=["A-E","A-X"] #given=["A-P","A-X"] #given=["A-E","A-P","A-X"] #given=["A-E","I-E","A-P","I-P"] #given=["A-E","I-E","A-P","I-P","A-X","I-X"] #given=["I-E","I-P"] data, classes, _ = cl.take_some_classes(data, classes, given=given, others=None) # balance the sample sizes of the classes rng = numpy.random.RandomState(1000) data, classes, others = cl.balance_sample_size(data, classes, others=None, min_size_given=None, rng=rng) print data.shape print numpy.unique(classes) #group=[["A-E"],["I-E"],["A-P"],["I-P"],["A-X"],["I-X"],["UK"]] #group=[["A-E","A-P"],["I-E","I-P","A-X","I-X","UK"]]
Each row is a string (white space not allowed) as the feature name of the corresponding column in [1]. """ ################################## #load your data here ... ################################## filename = "/home/yifengli/research/dnashape/result/Data_1000bp.txt" data = numpy.loadtxt(filename, delimiter='\t', dtype='float16') filename = "/home/yifengli/research/dnashape/result/Classes_1000bp.txt" classes = numpy.loadtxt(filename, delimiter='\t', dtype=str) filename = "/home/yifengli/research/dnashape/result/Features.txt" features = numpy.loadtxt(filename, delimiter='\t', dtype=str) # change class labels given = ["Enhancer", "EnhancerFalse"] data, classes = cl.take_some_classes(data, classes, given) given = {"Enhancer": 0, "EnhancerFalse": 1} classes = cl.change_class_labels_to_given(classes, given) train_set_x_org,train_set_y_org,valid_set_x_org,valid_set_y_org,test_set_x_org,test_set_y_org \ =cl.partition_train_valid_test(data,classes,ratio=(1,1,1)) del data gc_collect() rng = numpy.random.RandomState(1000) numpy.warnings.filterwarnings('ignore') # train classifier, training_time = convolutional_mlp.train_model( train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org,