Example #1
0
def main():

    # argument method
    parser = argparse.ArgumentParser()
    # positional argument
    parser.add_argument('-d',
                        '--directory',
                        '--dir',
                        help='parent directory of the samples i.e. Sample_054',
                        required=True)
    # optional argument
    parser.add_argument(
        '-f',
        '--filter',
        action='store_true',
        help=
        'use argument to keep the following columns: CHROM, POS, REF, ALT, genotype info. \n'
        'Note: if option selection, the -d can either be the vcf.gz or the parent directory. '
        'If vcf.gz, it will perform filtering on that file. Else, it will perform filtering on'
        'all the files in the hierarchical directory.')
    # optional argument
    parser.add_argument(
        '-m',
        '--merge',
        action='store_true',
        help=
        'use argument to merge all filtered.vcf.gz files in the parent directory'
    )
    # optional argument
    parser.add_argument(
        '-o',
        '--output',
        help='directory where to output the filtered or merged files')
    # optional argument
    parser.add_argument('-ht',
                        '--homozygous_test',
                        action='store_true',
                        help='use argument to collect homozygous statistics')
    # optional argument
    parser.add_argument(
        '-s',
        '--subset',
        action='store_true',
        help='use argument to subset the vcf file based on chromosomes')
    # optional argument
    parser.add_argument(
        '-c',
        '--chromosome',
        help=
        'use argument to select the chromosome number on which to subset on')
    # optional argument
    parser.add_argument(
        '-n',
        '--number_sites',
        help='use argument to select the number of line on which to subset on',
        type=int)
    # optional argument
    parser.add_argument('-p',
                        '--phase',
                        action='store_true',
                        help='use argument to select the phasing test')
    parser.add_argument('-lc',
                        '--list_chromosomes',
                        action='store_true',
                        help='list all the chromosomes in a vcf.gz '
                        'file')
    args = parser.parse_args()

    working_directory, output_directory, chromosome, filter_merge = process_arguments(
        c_directory=args.directory,
        o_directory=args.output,
        filter_flag=args.filter,
        merge_flag=args.merge,
        chrom=args.chromosome,
        arg_parser=parser)

    # create VCF object
    vcf = VCF()

    if args.list_chromosomes:
        vcf.read_files(c_dir=working_directory, vcf_filtered_file=False)
        vcf.list_chrom(output_dir=output_directory)

    # filtering and merging have to read all the vcf files on the subdirectories of the families
    elif filter_merge:
        # read all the vcf files for that family
        vcf.read_files(c_dir=working_directory)
        # filter columns of the vcf files
        if args.filter:
            vcf.filter(output_dir=output_directory)
        else:
            vcf.merge(output_dir=output_directory)

    # subset, homozygous or phasing tests have to read a vcf file in the parent directory
    else:
        # for the subset, the chromosome should not be given in the read_file function
        if args.subset:
            # chromosome if required if goal is to subset since the file will be subsetted on it
            if not args.chromosome:
                raise ValueError(
                    'Chromosome number must be provided in order to perform subset'
                )

            # read all the vcf files for that family
            vcf.read_files(c_dir=working_directory, vcf_filtered_file=True)
            vcf.subset(chrom=args.chromosome,
                       output_dir=output_directory,
                       n_sites=args.number_sites)

        else:
            # read all the vcf files for that family
            vcf.read_files(c_dir=working_directory,
                           vcf_filtered_file=True,
                           chrom=chromosome)

            if args.homozygous_test:
                # collect homozygous statistics
                vcf.tests(output_dir=output_directory,
                          chrom=chromosome,
                          homozygous_test=True)

            if args.phase:
                vcf.tmp_test(output_dir=output_directory, chrom=chromosome)
Example #2
0
title_indel_exonicfunc = ['Sample','frameshift_deletion','frameshift_insertion','nonframeshift_deletion','nonframeshift_insertion','stoploss','stopgain','unknown']
indel_exonicfunc.write('\t'.join(title_indel_exonicfunc)+'\n')






for file in open(files, 'r'):
	if file.startswith('#'):continue
	file = file.strip()
	sample_name = os.path.basename(file)
	sample_name = sample_name.split('.')[0]

	myVCF = VCF(file)
	snp = myVCF.filter()
	indel_file = file.replace('snp','indel')
	myVCF = VCF(indel_file)
	indel = myVCF.filter()
	
	"""
	##chr.xls
	chr = myVCF.chr_stat(vcf)
	chromosome.write(sample_name)
	for i in [str(i) for i in range(1,23)]+['X','Y']:
		try:
			chromosome.write('\t'+str(chr[i]))
		except:
			chromosome.write('\t0')
	chromosome.write('\n')
	"""
Example #3
0
##indel_exonicfunc.xls
title_indel_exonicfunc = [
    'Sample', 'frameshift_deletion', 'frameshift_insertion',
    'nonframeshift_deletion', 'nonframeshift_insertion', 'stoploss',
    'stopgain', 'unknown'
]
indel_exonicfunc.write('\t'.join(title_indel_exonicfunc) + '\n')

for file in open(files, 'r'):
    if file.startswith('#'): continue
    file = file.strip()
    sample_name = os.path.basename(file)
    sample_name = sample_name.split('.')[0]

    myVCF = VCF(file)
    snp = myVCF.filter()
    indel_file = file.replace('snp', 'indel')
    myVCF = VCF(indel_file)
    indel = myVCF.filter()
    """
	##chr.xls
	chr = myVCF.chr_stat(vcf)
	chromosome.write(sample_name)
	for i in [str(i) for i in range(1,23)]+['X','Y']:
		try:
			chromosome.write('\t'+str(chr[i]))
		except:
			chromosome.write('\t0')
	chromosome.write('\n')
	"""