Exemple #1
0
parser.add_argument("--ploidy",
                    help="Ploidy for splitting phased sequences",
                    action="store",
                    type=int,
                    nargs="+",
                    default=2)
parser.set_defaults(split=True)

args = parser.parse_args()

with gzip.open(args.gff, "rt") if args.gff.endswith(".gz") else open(
        args.gff, "rt") as gff:
    gffLines = gff.readlines()

sys.stderr.write("Parsing gene data from gff\n")
geneData = genomics.parseGenes(gffLines, targets=args.targets)

if not args.outFile: outFile = sys.stdout
else:
    outFile = gzip.open(args.outFile,
                        "wt") if args.outFile.endswith(".gz") else open(
                            args.outFile, "wt")

###################################################

#extract each scaffold from the geno file, and the genes for each scaffold and write them out

for scaffold in geneData.keys():
    mRNAs = geneData[scaffold].keys()
    sys.stderr.write("Extracting " + str(len(mRNAs)) +
                     " gene sequences from " + scaffold + "\n")
Exemple #2
0
                    dest='split',
                    action='store_true')
parser.add_argument('--no-split',
                    help="Do not split sequences",
                    dest='split',
                    action='store_false')
parser.set_defaults(split=True)

args = parser.parse_args()
#args = parser.parse_args(["--gff", "/scratch/shm45/Hmel2/Hmel2.cortex.gff", "-g", "/zoo/disk1/shm45/vcf/rosina/ros10.Hmel2.bwa.default.HC.DP8.Hmel215006.geno.gz"])

with gzip.open(args.gff, "r") if args.gff.endswith(".gz") else open(
        args.gff, "r") as gff:
    gffLines = gff.readlines()

geneData = genomics.parseGenes(gffLines)

if not args.outFile: outFile = sys.stdout
else:
    outFile = gzip.open(args.outFile,
                        "w") if args.outFile.endswith(".gz") else open(
                            args.outFile, "w")

###################################################

#extract each scaffold from the geno file, and the genes for each scaffold and write them out

for scaffold in geneData.keys():
    mRNAs = geneData[scaffold].keys()
    sys.stderr.write("Extracting " + str(len(mRNAs)) +
                     " gene sequences from " + scaffold + "\n")
parser.add_argument(
    "--ignoreConflicts",
    help=
    "Don't fail if two annotations give conflicting information about the same site",
    action='store_true')

args = parser.parse_args()

################################################################################

#get gene data
sys.stderr.write("Parsing annotation\n")
with gzip.open(args.annotation,
               "rt") if args.annotation.endswith(".gz") else open(
                   args.annotation, "rt") as ann:
    geneData = genomics.parseGenes(ann.readlines(), fmt=args.format)

#get scaffold names
sys.stderr.write("Loading reference genome\n")
with gzip.open(args.ref, "rt") if args.ref.endswith(".gz") else open(
        args.ref, "rt") as ref:
    scaffolds, _sequences_ = genomics.parseFasta(ref.read(),
                                                 makeUppercase=True)
    sequences = {}
    for i, scaffold in enumerate(scaffolds):
        sequences[scaffold] = _sequences_[i]

#open output
if not args.outFile: outFile = sys.stdout
else:
    outFile = gzip.open(args.outFile,