예제 #1
0
import glob, os
import numpy as np
from streaming_eigenhashes import StreamingEigenhashes

help_message = "usage example: python kmer_corpus.py -i /project/home/hashed_reads/ -o /project/home/cluster_vectors/"
if __name__ == "__main__":
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:o:r:", ["inputdir=", "outputdir=", "filerank="])
    except:
        print help_message
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            print help_message
            sys.exit()
        elif opt in ("-r", "--filerank"):
            fr = int(arg) - 1
        elif opt in ("-i", "--inputdir"):
            inputdir = arg
            if inputdir[-1] != "/":
                inputdir += "/"
        elif opt in ("-o", "--outputdir"):
            outputdir = arg
            if outputdir[-1] != "/":
                outputdir += "/"
    hashobject = StreamingEigenhashes(inputdir, outputdir, get_pool=False)
    Kmer_Hash_Count_Files = glob.glob(os.path.join(hashobject.input_path, "*.count.hash"))
    # M = np.load(hashobject.input_path+'column_mask.npy')
    M = []
    hashobject.kmer_corpus_to_disk(Kmer_Hash_Count_Files[fr], mask=M)
예제 #2
0
                        dest='task_rank',
                        type=int,
                        metavar='<task_rank>',
                        help='The rank of the currant task.')

    args = parser.parse_args()
    return args


if __name__ == "__main__":
    args = interface()

    input_dir = os.path.abspath(args.IN)
    if not input_dir.endswith('/'):
        input_dir += '/'

    output_dir = os.path.abspath(args.OUT)
    if not output_dir.endswith('/'):
        output_dir += '/'

    task_rank = args.task_rank - 1

    hashobject = StreamingEigenhashes(input_dir, output_dir, get_pool=False)
    Kmer_Hash_Count_Files = glob.glob(
        os.path.join(hashobject.input_path, '*.count.hash'))
    # M = np.load(hashobject.input_path + 'column_mask.npy')
    M = []
    print("[KmerCorpus] Computing kmer corpus.")
    hashobject.kmer_corpus_to_disk(Kmer_Hash_Count_Files[task_rank], mask=M)
    print("[KmerCorpus] Done.")