if __name__ == '__main__': n_cluster = 0 if len(sys.argv) > 1: n_cluster = int(sys.argv[1]) print 'loading data' with open('deep_feature.pickle', 'rb') as handle: NNdict = pickle.load(handle) with open('pca_feature.pickle', 'rb') as handle: PCAdict = pickle.load(handle) #all_data, labeled_data,unlabeled_data,label_unique_list,all_label, labeled_label,all_sample_ID,labeled_sample_ID,unlabeled_sample_ID,gene_names=parse_data.load_integrated_data('data/TPM_mouse_1_4_6_7_8_10_16.txt', whitening=True) #all_data, labeled_data,unlabeled_data,label_unique_list,all_label, labeled_label, all_weights, labeled_weights, unlabeled_weights,all_sample_ID,labeled_sample_ID,unlabeled_sample_ID,gene_names=parse_data.load_integrated_data('data/TPM_mouse_1_4_6_7_8_10_16.txt',sample_normalize=True,gene_normalize=True) all_data, labeled_data, unlabeled_data, label_unique_list, all_label, labeled_label, all_weights, labeled_weights, unlabeled_weights, all_sample_ID, labeled_sample_ID, unlabeled_sample_ID, gene_names = parse_data.load_integrated_data( 'data/TPM_mouse_1_4_6_7_8_10_16.txt', sample_normalize=True, gene_normalize=True) #all_data, labeled_data,unlabeled_data,label_unique_list,all_label, labeled_label, all_weights, labeled_weights, unlabeled_weights,all_sample_ID,labeled_sample_ID,unlabeled_sample_ID,gene_names=parse_data.load_integrated_data('data/TPM_mouse_1_4_6_7_8_10_16.txt',whitening=False) #Label=labeled_label Label = labeled_label #bench_k_means(KMeans(init='k-means++', n_clusters=n_digits, n_init=10), if n_cluster == 0: n_cluster = max(Label) print 'n_cluster=', n_cluster #nlabels=[] #nlabels.append(max(Label)) n_iter = 10 estimators = [] est_name = []
''' #data_file_name='../data/TPM_mouse_1_4_6_7_8_10_16.txt' data_file_name = 'important_file/TPM_mouse_7_8_10_PPITF_gene_9437.txt' if args.data_file is not None: data_file_name = args.data_file #if 'boot' in args.neural_network: # append_ID='_'+args.neural_network.split('_')[-1].split('.')[0] #else: # append_ID='' if args.identification is not None: append_ID = '_' + args.identification else: append_ID = '' all_data, labeled_data, unlabeled_data, label_unique_list, all_label, labeled_label, all_weights, labeled_weights, unlabeled_weights, all_sample_ID, labeled_sample_ID, unlabeled_sample_ID, gene_names = parse_data.load_integrated_data( data_file_name, sample_normalize=args.sample_normalize, gene_normalize=args.gene_normalize, log_trans=args.log_trans, ref_gene_file=args.reference_gene_file) #'cluster_genes.txt' all_data_sn1_gn0, _, _, _, _, _, _, _, _, _, _, _, _ = parse_data.load_integrated_data( data_file_name, sample_normalize=1, gene_normalize=0, ref_gene_file=args.reference_gene_file) #'cluster_genes.txt' fit_data = all_data transform_data = all_data #print 'all_data.shape: ', all_data.shape #all_data=all_data[:100,:200] if args.fit == 'labeled':