Esempio n. 1
0
    for file in file_list:
        base_file_name = os.path.splitext(file)[0]
        annotation_input_file = base_file_name + '_HGVS.temp'
        output_file = base_file_name + '.vcf'

        # Clean LOVD data for VCF
        lovd_file = pd.read_csv(file, sep=COLUMN_DELIMITER)
        lovd_file = vcf.remove_malformed_fields(lovd_file)

        # Output VCF variants for annotation
        column_list = [
            'dna_change', 'protein_change', 'var_pub_as', 'rna_change',
            'db_id', 'variant_remarks', 'reference', 'frequency'
        ]

        vcf_format = vcf.convert_to_vcf_format(lovd_file[column_list], rm,
                                               'dna_change', 'LOVD')
        vcf_column_order = [
            'CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO'
        ]

        with open(annotation_input_file, 'w') as f:
            vcf_header = [
                '##fileformat=VCFv4.0',
                vcf.get_vcf_info_header(lovd_file[column_list], 'LOVD',
                                        'Data from LOVD'),
                '#' + '\t'.join(vcf_column_order)
            ]
            f.write('\n'.join(vcf_header) + '\n')

            vcf_format[vcf_column_order].to_csv(f,
                                                sep=COLUMN_DELIMITER,
    rm = VariantRemapper()

    for file in file_list:
        base_file_name = os.path.splitext(file)[0]
        annotation_input_file = base_file_name + '_HGVS.temp'
        output_file = base_file_name + '.vcf'

        # Clean LOVD data for VCF
        lovd_file = pd.read_csv(file, sep=COLUMN_DELIMITER)
        lovd_file = vcf.remove_malformed_fields(lovd_file)

        # Output VCF variants for annotation
        column_list = ['dna_change', 'protein_change', 'var_pub_as', 'rna_change', 'db_id', 'variant_remarks', 'reference', 'frequency']


        vcf_format = vcf.convert_to_vcf_format(lovd_file[column_list], rm, 'dna_change', 'LOVD')
        vcf_column_order = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO']

        with open(annotation_input_file, 'w') as f:
            vcf_header = ['##fileformat=VCFv4.0',
                          vcf.get_vcf_info_header(lovd_file[column_list], 'LOVD', 'Data from LOVD'),
                          '#' + '\t'.join(vcf_column_order)
            ]
            f.write('\n'.join(vcf_header) + '\n')

            vcf_format[vcf_column_order].to_csv(f, sep=COLUMN_DELIMITER, header=False, index=False)

        # Annotate with VEP
        annotate_vcf.annotate_vep(annotation_input_file, output_file)

        # Combine VEP VCF output with original LOVD data