parser.add_argument("--ploidy", help="Ploidy for splitting phased sequences", action="store", type=int, nargs="+", default=2) parser.set_defaults(split=True) args = parser.parse_args() with gzip.open(args.gff, "rt") if args.gff.endswith(".gz") else open( args.gff, "rt") as gff: gffLines = gff.readlines() sys.stderr.write("Parsing gene data from gff\n") geneData = genomics.parseGenes(gffLines, targets=args.targets) if not args.outFile: outFile = sys.stdout else: outFile = gzip.open(args.outFile, "wt") if args.outFile.endswith(".gz") else open( args.outFile, "wt") ################################################### #extract each scaffold from the geno file, and the genes for each scaffold and write them out for scaffold in geneData.keys(): mRNAs = geneData[scaffold].keys() sys.stderr.write("Extracting " + str(len(mRNAs)) + " gene sequences from " + scaffold + "\n")
dest='split', action='store_true') parser.add_argument('--no-split', help="Do not split sequences", dest='split', action='store_false') parser.set_defaults(split=True) args = parser.parse_args() #args = parser.parse_args(["--gff", "/scratch/shm45/Hmel2/Hmel2.cortex.gff", "-g", "/zoo/disk1/shm45/vcf/rosina/ros10.Hmel2.bwa.default.HC.DP8.Hmel215006.geno.gz"]) with gzip.open(args.gff, "r") if args.gff.endswith(".gz") else open( args.gff, "r") as gff: gffLines = gff.readlines() geneData = genomics.parseGenes(gffLines) if not args.outFile: outFile = sys.stdout else: outFile = gzip.open(args.outFile, "w") if args.outFile.endswith(".gz") else open( args.outFile, "w") ################################################### #extract each scaffold from the geno file, and the genes for each scaffold and write them out for scaffold in geneData.keys(): mRNAs = geneData[scaffold].keys() sys.stderr.write("Extracting " + str(len(mRNAs)) + " gene sequences from " + scaffold + "\n")
parser.add_argument( "--ignoreConflicts", help= "Don't fail if two annotations give conflicting information about the same site", action='store_true') args = parser.parse_args() ################################################################################ #get gene data sys.stderr.write("Parsing annotation\n") with gzip.open(args.annotation, "rt") if args.annotation.endswith(".gz") else open( args.annotation, "rt") as ann: geneData = genomics.parseGenes(ann.readlines(), fmt=args.format) #get scaffold names sys.stderr.write("Loading reference genome\n") with gzip.open(args.ref, "rt") if args.ref.endswith(".gz") else open( args.ref, "rt") as ref: scaffolds, _sequences_ = genomics.parseFasta(ref.read(), makeUppercase=True) sequences = {} for i, scaffold in enumerate(scaffolds): sequences[scaffold] = _sequences_[i] #open output if not args.outFile: outFile = sys.stdout else: outFile = gzip.open(args.outFile,