return args # MAIN if __name__ == "__main__": args = interface() input_dir = os.path.abspath(args.IN) if not input_dir.endswith('/'): input_dir += '/' output_dir = os.path.abspath(args.OUT) if not output_dir.endswith('/'): output_dir += '/' thresh = args.threshold hashobject = StreamingEigenhashes(input_dir, output_dir, get_pool=-1) Kmer_Hash_Count_Files = glob.glob(os.path.join(hashobject.input_path, '*.count.hash.conditioned')) hashobject.path_dict = {} for i in range(len(Kmer_Hash_Count_Files)): hashobject.path_dict[i] = Kmer_Hash_Count_Files[i] lsi = models.LsiModel.load(hashobject.output_path + 'kmer_lsi.gensim') hashobject.cluster_thresh = thresh Index = hashobject.lsi_cluster_index(lsi) np.save(hashobject.output_path + 'cluster_index.npy', Index) print('Cluster index has shape: ' + str(Index.shape)) with open(hashobject.output_path + 'numClusters.txt', 'w') as f: f.write('{0}\n'.format(Index.shape[0]))
sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): print help_message sys.exit() elif opt in ("-i", "--inputdir"): inputdir = arg if inputdir[-1] != "/": inputdir += "/" elif opt in ("-o", "--outputdir"): outputdir = arg if outputdir[-1] != "/": outputdir += "/" elif opt in ("-r", "--filerank"): fr = int(arg) - 1 elif opt in ("-t", "--thresh"): thresh = float(arg) hashobject = StreamingEigenhashes(inputdir, outputdir, get_pool=-1) Kmer_Hash_Count_Files = glob.glob(os.path.join(hashobject.input_path, "*.count.hash.conditioned")) hashobject.path_dict = {} for i in range(len(Kmer_Hash_Count_Files)): hashobject.path_dict[i] = Kmer_Hash_Count_Files[i] lsi = models.LsiModel.load(hashobject.output_path + "kmer_lsi.gensim") hashobject.cluster_thresh = thresh Index = hashobject.lsi_cluster_index(lsi) np.save(hashobject.output_path + "cluster_index.npy", Index) print "cluster index has shape:", Index.shape f = open(hashobject.output_path + "numClusters.txt", "w") f.write("%d\n" % Index.shape[0]) f.close()