help='name of the phylip output file', type=str, required=False) parser.add_argument('-s', '--samples', help='column names of the samples to process (optional)', type=str, required=False) args = parser.parse_args() # check if any option was specified: if not (args.fasta or args.phylip): raise IOError('Either -f or -p options need to be specified.') # check if samples names are given and if all sample names are present in a header sampleNames = calls.checkSampleNames(args.samples, args.input) ############################# program ############################# callsDF = calls.callsParser(args.input, sampleNames) if args.fasta and args.phylip: outputFasta = open(args.fasta, 'w') outputPhy = open(args.phylip, 'w') NumberPos = len(callsDF.positions) NumberSamp = len(sampleNames) outputPhy.write(' %s %s\n' % (NumberSamp, NumberPos)) # make .phy header # write sample name into file for s in callsDF.names:
help= 'Specify the populations in the format "pop1[sample1,sample2];pop2[sample5,sample6]"', type=str, required=True) #parser.add_argument('-p', '--parental', help = 'Specify a parental groups', type=str, required=True) args = parser.parse_args() # check and append population names and samples popNames = args.pop pops = [] for popi in popNames.strip("\"").split(";"): popName = popi.split("[")[0] popSample = re.split("\[|\]", popi)[1] pops.append(popName) vars()[popName + "samples"] = calls.checkSampleNames(popSample, args.input) ############################## program ############################# counter = 0 print('Opening the file...') with open(args.input) as datafile: header_words = datafile.readline().split() # make output header print('Creating the output file...') fileoutput = open(args.output, 'w') popsP = '\t'.join(str(w) for w in pops) fileoutput.write("%s\n" % popsP)
#import collections import calls # my custom module ############################# options ############################# parser = calls.CommandLineParser() parser.add_argument('-i', '--input', help = 'name of the input file', type=str, required=True) parser.add_argument('-o', '--output', help = 'name of the output file', type=str, required=True) parser.add_argument('-m', '--missing', help = 'missing data threshold to remove sites', type=int, required=True) parser.add_argument('-s', '--samples', help = 'column names of the samples to process (optional)', type=str, required=False) args = parser.parse_args() # check if samples names are given and if all sample names are present in a header sampleNames = calls.checkSampleNames(args.samples, args.input) ############################# program ############################# counter = 0 siteNumber = 1 chrPrev = str(1) print('Opening the file...') with open(args.input) as datafile: header_line = datafile.readline() header_words = header_line.split() # index samples sampCol = calls.indexSamples(sampleNames, header_words)
'--family', help='Specify the family list in the format \ "family1[sample1,sample2];family2[sample5,sample6]"', type=str, required=True) args = parser.parse_args() # check Family list familyNames = args.family Fsamples = [] famDict = {} for i in familyNames.strip("\"").split(";"): famName = i.split("[")[0] famSample = re.split("\[|\]|", i)[1] Fsamples.append(famSample.split(",")) famDict[famName] = calls.checkSampleNames(famSample, args.input) Fsamples = calls.flattenList(Fsamples) ############################# program ############################# callsDF = calls.callsParser(args.input, Fsamples) outputPED = open(args.output + '.ped', 'w') outputMAP = open(args.output + '.map', 'w') for i in range(len(callsDF.positions)): # make map file snpsID = str(callsDF.chrmosomes[i]) + "_" + str(callsDF.positions[i]) cM = float(callsDF.positions[i]) * 0.000001 outputMAP.write("%s %s %s %s\n" % (callsDF.chrmosomes[i], snpsID, cM, callsDF.positions[i]))
############################# modules ############################# import calls # my custom module ############################# options ############################# parser = calls.CommandLineParser() parser.add_argument('-a', '--annotation', help = 'name of the SIFT annotation file', type=str, required=True) parser.add_argument('-t', '--tab', help = 'tab delimited genotype file', type=str, required=True) parser.add_argument('-o', '--output', help = 'name of the output file', type=str, required=True) parser.add_argument('-f', '--fields', help = 'annotation fields to extract. Possible options: CHROM, POS, REF_ALLELE, ALT_ALLELE, TRANSCRIPT_ID, GENE_ID, GENE_NAME, REGION, VARIANT_TYPE, REF_AMINO, ALT_AMINO, AMINO_POS, SIFT_SCORE, SIFT_MEDIAN, NUM_SEQS, dbSNP, SIFT_PREDICTION', type=str, required=True) parser.add_argument('-s', '--samples', help = 'column names of the samples to process (optional)', type=str, required=False) args = parser.parse_args() # check if samples names are given and if all sample names are present in a header sampleNames = calls.checkSampleNames(args.samples, args.tab) ############################# program ############################# print('Opening the file...') counter = 0 siftFile = open(args.annotation, 'r') annotOptions = siftFile.readline().split() fieldsNames = args.fields.split(',') fieldsIndex = calls.indexSamples(fieldsNames, annotOptions) sift_words = siftFile.readline().split() sift_chr = int(sift_words[0].split('_')[1]) sift_pos = int(sift_words[1])
'-s', '--samples', help= 'column names of the samples to make a reference for. Specify in the format "group1[sample1,sample2];group2[sample3,sample4,sample5]"', type=str, required=True) args = parser.parse_args() # check and append group names and samples groupNames = args.samples groups = [] for groupi in groupNames.strip("\"").split(";"): groupNames = groupi.split("[")[0] groupSample = re.split("\[|\]", groupi)[1] groups.append(groupNames) vars()[groupNames + "samples"] = calls.checkSampleNames( groupSample, args.callsFile) ############################ script ############################## fileoutput = open(args.output, 'w') fastaRef = FastaParser(args.fastaReference) # to track lines in the input files fastaNum = 0 CHRprev = 0 CHRnum = 0 fastaNumAdd = 0 print('Opening the file...') with open(args.callsFile) as callsFile:
import calls # my custom module import re ############################# options ############################# parser = calls.CommandLineParser() parser.add_argument('-i', '--input', help = 'name of the input file', type=str, required=True) parser.add_argument('-o', '--output', help = 'name of the output file', type=str, required=True) parser.add_argument('-s', '--sample', help = 'Specify the sample group', type=str, required=True) parser.add_argument('-p', '--parental', help = 'Specify a parental groups', type=str, required=True) args = parser.parse_args() # check if samples names are given and if all sample names are present in a header sNames = calls.checkSampleNames(args.sample, args.input) pNames = calls.checkSampleNames(args.parental, args.input) ############################# program ############################# counter = 0 print('Opening the file...') with open(args.input) as datafile: header_words = datafile.readline().split() # index samples sIndex = calls.indexSamples(sNames, header_words) pIndex = calls.indexSamples(pNames, header_words)
'--family', help='Specify the family list in the format \ "family1[sample1,sample2];family2[sample5,sample6]"', type=str, required=True) args = parser.parse_args() print "Checking sample names ..." familyNames = args.family Fsamples = [] familySamples = {} for i in familyNames.strip("\"").split(";"): famName = i.split("[")[0] famSample = re.split("\[|\]|", i)[1] Fsamples.append(famSample.split(",")) familySamples[famName] = calls.checkSampleNames(famSample, args.input) samples = calls.flattenList(Fsamples) calls.checkSampleNames('ANC', args.input) calls.checkSampleNames('DER', args.input) ############################# program ############################# outputSNPs = open(args.output + '.snps', 'w') with open(args.input) as datafile: header_words = datafile.readline().split() sampleIndex = calls.indexSamples(samples, header_words) ANCindex = calls.indexSamples(['ANC'], header_words) DERindex = calls.indexSamples(['DER'], header_words) FamilyIndex = {}