Ejemplo n.º 1
0
    def test_preferred_transcripts_false(self):
        """
        Check that preferred transcripts are labelled correctly. 
        Not in preferred transcripts - should be false.
        """
        # apply preferred transcripts
        self.pt = preferred_transcripts()
        self.pt.load(os.path.abspath('test/PreferredTranscripts.txt'))
        self.pt.apply(self.report, 'low')

        # check in report
        with open(self.report.report_path) as report:
            reader = csv.reader(report, delimiter='\t')
            for line in reader:
                if line[1] == '3:41265953CT>C':
                    self.assertEqual(line[2], 'False')
Ejemplo n.º 2
0
    def test_preferred_transcripts_low_strictness_true_1(self):
        """
        Check that preferred transcripts are labelled correctly. 
        NM_001007553 should be true with high and low strictness
        """
        # apply preferred transcripts
        self.pt = preferred_transcripts()
        self.pt.load(os.path.abspath('test/PreferredTranscripts.txt'))
        self.pt.apply(self.report, 'low')

        # check in report
        with open(self.report.report_path) as report:
            reader = csv.reader(report, delimiter='\t')
            for line in reader:
                if line[1] == '1:115256669G>A':
                    if 'NM_001007553' in line[29]:
                        self.assertEqual(line[3], 'True')
Ejemplo n.º 3
0
    def test_preferred_transcripts_low_strictness_true_2(self):
        """
        Check that preferred transcripts are labelled correctly. 
        XM_005245221 should be false with high strictness and true with
        low strictness.
        """
        # apply preferred transcripts
        self.pt = preferred_transcripts()
        self.pt.load(os.path.abspath('test/PreferredTranscripts.txt'))
        self.pt.apply(self.report, 'low')

        # check in report
        with open(self.report.report_path) as report:
            reader = csv.reader(report, delimiter='\t')
            for line in reader:
                if line[1] == '1:162681151T>G':
                    if 'XM_005245221' in line[29]:
                        self.assertEqual(line[3], 'True')
Ejemplo n.º 4
0
def main(args):
    # setup logger
    logger = logging.getLogger('vcf_parse')
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(levelname)s\t%(asctime)s\t%(name)s\t%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.info('running vcf_parse.py...')

    # Load arguments, make vcf report object and load data
    report = vcf_report()
    report.load_data(args.input, args.output)

    # If -l flag called, print headers and exit
    if args.config_list:
        report.list_config()
        exit()

    # If config file provided, load config
    if args.config:
        report.load_config(args.config)
    else:
        logger.info('no config file found -- outputting all data from VCF.')

    # Make variant report of whole VCF
    report.make_report(args.filter_non_pass)

    # If preferred transcripts provided, apply to variant report
    if args.transcripts:
        pt = preferred_transcripts()
        pt.load(args.transcripts)
        pt.apply(report, args.transcript_strictness)
    else:
        logger.info('no preferred transcripts file provided -- preferred ' +
                    'transcripts column will all be labelled as "Unknown"')

    # If known variants provided, apply to variant report
    if args.known_variants:
        known = known_variants()
        known.load_known_variants(args.known_variants)
        known.apply_known_variants(report)

    else:
        logger.info('no known variants file provided -- Classification ' +
                    'column will be empty')

    # If single BED file provided, make variant report with BED file
    # applied
    if args.bed:
        bed = bed_object()
        bed.apply_single(args.bed, report)

    # If folder of BED file provided, make a seperate variant report
    # for each BED file. Output will be saved in a folder named the
    # same as the BED file folder, within the output directory.
    elif args.bed_folder:
        bed = bed_object()
        bed.apply_multiple(args.bed_folder, report)

    # If no BED files provided, pass
    else:
        logger.info('no BED files provided')

    # Finish
    logger.info('vcf_parse.py completed\n{}'.format('---' * 30))