Example #1
0
File: ui.py Project: xflicsu/immuno
def create_variants(file, patient_id):
    filename = secure_filename(file.filename)
    filepath = join(app.config['UPLOAD_FOLDER'], filename)
    file.save(filepath)
    vcf_df = load_variants(filepath)
    variants = []
    for index, row in vcf_df.iterrows():
        chr = row['chr']
        pos = row['pos']
        ref = row['ref']
        alt = row['alt']
        variant = Variant(patient_id=patient_id, chr=chr, pos=pos,
            ref=ref, alt=alt)
        variants.append(variant)
    return variants
Example #2
0
def find_mutation_files(
        input_files, combined_maf=False, max_peptide_length=31):
    """
    Collect all .vcf/.maf file paths in the `input_filenames` list.

    Returns a dictionary mapping patient IDs to DataFrames containing basic
    variant information (chr, pos, ref, alt). The patient IDs will be each
    filename without its extension, unless the argument combined_maf is True.
    In this case, patient IDs are derived from the tumor barcode column in
    each MAF file.
    """
    mutation_files = OrderedDict()

    for path in input_filenames:
        _, filename = split(path)
        base, ext = splitext(filename)
        if ext in MUTATION_FILE_EXTENSIONS:
            if ext.endswith('maf') and combined_maf:
                maf_df = load_maf(path)
                file_patients = {}
                for barcode, group_df in (
                        maf_df.groupby(['Tumor_Sample_Barcode'])):
                    vcf_df = maf_to_vcf(group_df)
                    patient_id = get_patient_id(barcode)
                    file_patients[patient_id] = vcf_df
            else:
                patient_id = get_patient_id(base)
                vcf_df = load_variants(path)
                file_patients = {patient_id: vcf_df}

            for patient_id, vcf_df in file_patients.iteritems():
                patient_id = "-".join(patient_id.split("-")[:3])
                if patient_id in mutation_files:
                    logging.warning(
                        "Already processed patient %s before file %s",
                            patient_id,
                            path)
                else:
                    mutation_files[patient_id] = vcf_df
    if args.debug_patient_id:
        patient_id = args.debug_patient_id
        mutation_files = {patient_id: mutation_files[patient_id]}
    return mutation_files
Example #3
0
def find_mutation_files(input_files,
                        combined_maf=False,
                        max_peptide_length=31):
    """
    Collect all .vcf/.maf file paths in the `input_filenames` list.

    Returns a dictionary mapping patient IDs to DataFrames containing basic
    variant information (chr, pos, ref, alt). The patient IDs will be each
    filename without its extension, unless the argument combined_maf is True.
    In this case, patient IDs are derived from the tumor barcode column in
    each MAF file.
    """
    mutation_files = OrderedDict()

    for path in input_filenames:
        _, filename = split(path)
        base, ext = splitext(filename)
        if ext in MUTATION_FILE_EXTENSIONS:
            if ext.endswith('maf') and combined_maf:
                maf_df = load_maf(path)
                file_patients = {}
                for barcode, group_df in (maf_df.groupby(
                    ['Tumor_Sample_Barcode'])):
                    vcf_df = maf_to_vcf(group_df)
                    patient_id = get_patient_id(barcode)
                    file_patients[patient_id] = vcf_df
            else:
                patient_id = get_patient_id(base)
                vcf_df = load_variants(path)
                file_patients = {patient_id: vcf_df}

            for patient_id, vcf_df in file_patients.iteritems():
                patient_id = "-".join(patient_id.split("-")[:3])
                if patient_id in mutation_files:
                    logging.warning(
                        "Already processed patient %s before file %s",
                        patient_id, path)
                else:
                    mutation_files[patient_id] = vcf_df
    if args.debug_patient_id:
        patient_id = args.debug_patient_id
        mutation_files = {patient_id: mutation_files[patient_id]}
    return mutation_files