parser.add_argument('-mapper_name', type=str, help='Mapper name') parser.add_argument("-o", "--out", type=str, required=True, help="path to save result folder") parser.add_argument("-save_name", type=str, required=True, help="merge study name") parser.add_argument('-study_name', type=str, required=True,nargs='+', help=' Name for saved genotype data, without ext') parser.add_argument('-cluster', type=str, default='n', choices=['y','n'], help=' Is it parallel cluster job, default no') parser.add_argument('-node', nargs='+',help='number of nodes / this node number, example: 10 2 ') parser.add_argument('-split',type=int,help='Split size for merge genotypes') args = parser.parse_args() print args if __name__ == '__main__': mapper=Mapper(args.mapper_name) mapper.load(args.mapper) mapper.chunk_size=args.split hdf5_iter=0 h5_name=args.save_name pytable_filter=tables.Filters(complevel=9, complib='zlib') gen=[] for i,j in enumerate(args.genotype): gen.append(Reader('genotype')) gen[i].start(j,hdf5=True, study_name=args.study_name[i], ID=False) RSID=[] SUB_ID=[] for i in gen:
G.split_size = CONVERTER_SPLIT_SIZE G.VCF2hdf5(args.out) else: raise ValueError( 'Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder' ) check_converter(args.out, args.study_name[0]) print('Time to convert all data: {} sec'.format(t.secs)) ################################### ENCODING ############################## elif args.mode == 'encoding': #ARG_CHECKER.check(args,mode='encoding') mapper = Mapper() mapper.genotype_names = args.study_name mapper.chunk_size = MAPPER_CHUNK_SIZE mapper.reference_name = args.ref_name mapper.load_flip(args.mapper) mapper.load(args.mapper) phen = Reader('phenotype') phen.start(args.phenotype[0]) gen = Reader('genotype') gen.start(args.genotype[0], hdf5=args.hdf5, study_name=args.study_name[0], ID=False)
elif R.format=='VCF': G = GenotypeVCF(args.study_name[0], reader=R) if args.cluster=='y': G.cluster=True G.split_size=CONVERTER_SPLIT_SIZE G.VCF2hdf5(args.out) else: raise ValueError('Genotype data should be in PLINK/MINIMAC/VCF format and alone in folder') print ('Time to convert all data: {} sec'.format(t.secs)) ################################### ENCODING ############################## elif args.mode=='encoding': #ARG_CHECKER.check(args,mode='encoding') mapper=Mapper() mapper.genotype_names=args.study_name mapper.chunk_size=MAPPER_CHUNK_SIZE mapper.reference_name=args.ref_name mapper.load_flip(args.mapper) mapper.load(args.mapper) phen=Reader('phenotype') phen.start(args.phenotype[0]) gen=Reader('genotype') gen.start(args.genotype[0], hdf5=args.hdf5, study_name=args.study_name[0], ID=False) e=Encoder(args.out) e.study_name=args.study_name[0]