Esempio n. 1
0
    def _add_family_info(self, project_id, family_id, individuals):
        """
        Add all the background info about this family
        We try to keep this as simple as possible - just IDs
        After this is run, variants are ready to be loaded
        """

        if self.family_exists(project_id, family_id):
            raise Exception("Family (%s, %s) already exists" % (project_id, family_id))

        for indiv_id in individuals:
            if not self.individual_exists(project_id, indiv_id):
                self.add_individual(project_id, indiv_id)

        family_coll_name = "family_%s_%s" % (
            slugify(project_id, separator='_'),
            slugify(family_id, separator='_'))

        family = {
            'project_id': project_id,
            'family_id': family_id,
            'individuals': individuals,
            'coll_name': family_coll_name,
            'status': 'loading'
        }

        family_collection = getattr(settings, self._db_name)[family_coll_name]
        self._index_family_collection(family_collection)

        getattr(settings, self._db_name).families.save(family)
Esempio n. 2
0
    def _add_family_info(self, project_id, family_id, individuals):
        """
        Add all the background info about this family
        We try to keep this as simple as possible - just IDs
        After this is run, variants are ready to be loaded
        """

        if self.family_exists(project_id, family_id):
            #raise Exception("Family (%s, %s) already exists" % (project_id, family_id))
            return

        for indiv_id in individuals:
            if not self.individual_exists(project_id, indiv_id):
                self.add_individual(project_id, indiv_id)

        family_coll_name = "family_%s_%s" % (slugify(
            project_id, separator='_'), slugify(family_id, separator='_'))
        family = {
            'project_id': project_id,
            'family_id': family_id,
            'individuals': individuals,
            'coll_name': family_coll_name,
            'status': 'loading'
        }

        family_collection = self._db[family_coll_name]
        self._index_family_collection(family_collection)

        self._db.families.save(family)
Esempio n. 3
0
def write_xl_rows_to_ped(ped_filename, xl_rows):
    """Writes the given rows to a ped file with the given filename

    Args:
        ped_filename: output filename 
        xl_rows: a list of tuples where each tuple has 6 elements: family_id, sample_id, paternal_id, maternal_id, sex, affected
    """

    with open(ped_filename, 'w') as out:
        for i, row in enumerate(xl_rows):
            assert len(
                row
            ) >= 6, "Unexpected number of columns in row #%(i)s: %(row)s" % locals(
            )

            if not any(row):
                continue  # skip empty rows

            #for _id in filter(None, row[0:4]):
            #    assert slugify(_id) == _id, "row %(i)s has unexpected characters in id: '%(_id)s'. Only a-Z0-9 and - or _ are allowed" % locals()

            print("%s: %s" % (i, row))

            family_id, sample_id, paternal_id, maternal_id, sex, affected = row[
                0:6]
            sample_id = slugify(sample_id, replace_dot=True)
            paternal_id = slugify(paternal_id, replace_dot=True)
            maternal_id = slugify(maternal_id, replace_dot=True)

            assert family_id and sample_id, "family_id or sample_id not specified in row: %(row)s" % locals(
            )

            paternal_id = '.' if not paternal_id else paternal_id
            maternal_id = '.' if not maternal_id else maternal_id

            if sex:
                if sex not in ("1", "2"):
                    sex = {'M': '1', 'F': '2'}[sex[0].upper()]
            else:
                sex = '.'

            if affected is not None:
                if affected not in ("1", "2"):
                    affected = {
                        'u': '1',
                        'unaffected': '1',
                        'no': '1',
                        'a': '2',
                        'affected': '2',
                        'yes': '2'
                    }[affected.strip().lower()]
            else:
                affected = '-9'

            out.write('\t'.join([
                family_id, sample_id, paternal_id, maternal_id, sex, affected
            ]) + '\n')
Esempio n. 4
0
    def handle(self, *args, **options):
        project_id = args[0]
        project = Project.objects.get(project_id=project_id)
        raw_family_ids = [line.strip('\n') for line in open(args[1])]

        for raw_id in raw_family_ids:
            old_slugified_id = slugify(raw_id, separator='_').lower()
            if Family.objects.filter(project=project, family_id=old_slugified_id).exists():
                family = Family.objects.get(project=project, family_id=old_slugified_id)
                family.family_id = slugify(raw_id, separator='_')  # set family ID to new slug repr
                family.save()
Esempio n. 5
0
def set_genotypes_from_vcf_fields(vcf_fields, variant, alt_allele_pos, vcf_header_fields, genotype_meta=True, indivs_to_include=None, vcf_id_map=None):
    """
    if variant is a basic variants, initialize its genotypes from vcf_fields
    vcf_header_fields is just a list of the headers in the vcf
    (with the # stripped of the #CHROM in the first column)

    vcf_id_map: dict of [ID in the VCF file] -> [Individual ID]
    """
    num_columns = len(vcf_fields)
    if num_columns != len(vcf_header_fields):
        raise Exception("Wrong number of columns")

    genotypes = {}
    format_str = vcf_fields[8]
    allele_position_map = get_allele_position_map(vcf_fields[3], vcf_fields[4])
    vcf_filter = vcf_fields[6].lower()

    formats = {}
    for i, item in enumerate(format_str.split(':')):
        if item == 'AD':
            formats['ad'] = i
        elif item == 'DP':
            formats['dp'] = i
        elif item == 'GQ':
            formats['gq'] = i
        elif item == 'PL':
            formats['pl'] = i

    if indivs_to_include:
        indivs_to_include = [slugify(indiv_id, separator='_', replace_dot=True) for indiv_id in indivs_to_include]
    for col_index in range(9, num_columns):

        vcf_id = slugify(vcf_header_fields[col_index], separator='_', replace_dot=True)
        if vcf_id_map:
            indiv_id = vcf_id_map.get(vcf_id, vcf_id)
        else:
            indiv_id = vcf_id
        if indivs_to_include and indiv_id not in indivs_to_include:
            continue
        geno_str = vcf_fields[col_index]
        try:
            if genotype_meta:
                genotypes[indiv_id] = get_genotype_from_str(geno_str, formats, alt_allele_pos, allele_position_map, vcf_filter=vcf_filter)
            else:
                raise Exception("genotypes without meta not implemented - need to add kwarg")

        except:
            sys.stdout.write("Could not parse genotype from string: %s with format: %s. Allele_position_map: %s" % (geno_str, format_str, allele_position_map))
            raise

    variant.genotypes = genotypes

    return variant
Esempio n. 6
0
def write_xl_rows_to_ped(ped_filename, xl_rows):
    """Writes the given rows to a ped file with the given filename

    Args:
        ped_filename: output filename 
        xl_rows: a list of tuples where each tuple has 6 elements: family_id, sample_id, paternal_id, maternal_id, sex, affected
    """

    with open(ped_filename, 'w') as out:
        for i, row in enumerate(xl_rows):
            assert len(row) >= 6, "Unexpected number of columns in row #%(i)s: %(row)s" % locals()

            if not any(row):  
                continue  # skip empty rows
            
            #for _id in filter(None, row[0:4]):
            #    assert slugify(_id) == _id, "row %(i)s has unexpected characters in id: '%(_id)s'. Only a-Z0-9 and - or _ are allowed" % locals()
            
            print("%s: %s" % (i, row))

            family_id, sample_id, paternal_id, maternal_id, sex, affected = row[0:6]
            sample_id = slugify(sample_id, replace_dot=True)
            paternal_id = slugify(paternal_id, replace_dot=True)
            maternal_id = slugify(maternal_id, replace_dot=True)

            family_id = family_id.split('-')[0]
            assert family_id, "family_id not specified in row: %(row)s" % locals()
            assert sample_id, "sample_id not specified in row: %(row)s" % locals()

            paternal_id = '.' if not paternal_id else paternal_id
            maternal_id = '.' if not maternal_id else maternal_id

            if sex:
                if sex not in ("1", "2"): 
                    sex = {'M': '1', 'F': '2', 'U': '0', '?': '0'}[sex[0].upper()]
            else:
                sex ='.'
                
            if affected is not None:
                affected = affected.lower()
                if affected not in ("1", "2"):                 
                    if affected == "no" or affected.strip().lower().startswith("u"):
                        affected = '1'
                    elif affected == "yes" or affected.strip().lower().startswith("a"):
                        affected ='2'
                    else:
                        raise ValueError("Unexpected value for affected: " + affected)
                    
            else:
                affected = '-9'

            out.write('\t'.join([family_id, sample_id, paternal_id, maternal_id, sex, affected]) + '\n')
Esempio n. 7
0
    def handle(self, *args, **options):
        project_id = args[0]
        project = Project.objects.get(project_id=project_id)
        raw_family_ids = [line.strip('\n') for line in open(args[1])]

        for raw_id in raw_family_ids:
            old_slugified_id = slugify(raw_id, separator='_').lower()
            if Family.objects.filter(project=project,
                                     family_id=old_slugified_id).exists():
                family = Family.objects.get(project=project,
                                            family_id=old_slugified_id)
                family.family_id = slugify(
                    raw_id, separator='_')  # set family ID to new slug repr
                family.save()
Esempio n. 8
0
def get_individuals_from_fam_file(fam_file, project_id='.'):
    """
    Returns a list of individuals from a FAM file
    """
    individuals = []

    for line in fam_file:
        try:
            # ignore these rows
            if line == '' or line.startswith('#'): continue

            fields = line.strip('\n').split('\t')

            indiv_id = slugify(fields[1], separator='_', replace_dot=True)
            family_id = slugify(fields[0], separator='_', replace_dot=True)

            paternal_id = slugify(fields[2], separator='_', replace_dot=True)
            if paternal_id == "0": paternal_id = "."

            maternal_id = slugify(fields[3], separator='_', replace_dot=True)
            if maternal_id == "0": maternal_id = "."

            gender = 'unknown'
            if fields[4] == '2' or fields[4].upper().startswith('F'):
                gender = 'female'
            elif fields[4] == '1' or fields[4].upper().startswith('M'):
                gender = 'male'

            affected_status = 'unknown'
            if fields[5] == '2' or fields[5].upper().startswith('A'):
                affected_status = 'affected'
            elif fields[5] == '1' or fields[5].upper().startswith('U'):
                affected_status = 'unaffected'
        except Exception as e:
            raise ValueError(
                "Couldn't parse line: %(line)s. Fields: %(fields)s. exception: %(e)s"
                % locals())

        indiv = Individual(
            indiv_id,
            project_id=project_id,
            family_id=family_id,
            paternal_id=paternal_id,
            maternal_id=maternal_id,
            gender=gender,
            affected_status=affected_status,
        )
        individuals.append(indiv)

    return individuals
Esempio n. 9
0
def get_individuals_from_fam_file(fam_file, project_id='.'):
    """
    Returns a list of individuals from a FAM file
    """
    individuals = []

    for line in fam_file:
        try:
            # ignore these rows
            if line == '' or line.startswith('#'): continue

            fields = line.strip('\n').split('\t')

            indiv_id = slugify(fields[1], separator='_')
            family_id = slugify(fields[0], separator='_')

            paternal_id = slugify(fields[2], separator='_')
            if paternal_id == "0": paternal_id = "."

            maternal_id = slugify(fields[3], separator='_')
            if maternal_id == "0": maternal_id = "."

            gender = 'unknown'
            if fields[4] == '2' or fields[4] == 'F':
                gender = 'female'
            elif fields[4] == '1' or fields[4] == 'M':
                gender = 'male'

            affected_status = 'unknown'
            if fields[5] == '2':
                affected_status = 'affected'
            elif fields[5] == '1':
                affected_status = 'unaffected'
        except Exception as e:
            raise ValueError("Couldn't parse line: %(line)s exception: %(e)s" % locals())

        indiv = Individual(
            indiv_id,
            project_id=project_id,
            family_id=family_id,
            paternal_id=paternal_id,
            maternal_id=maternal_id,
            gender=gender,
            affected_status=affected_status,
        )
        individuals.append(indiv)

    return individuals
Esempio n. 10
0
def get_ids_from_vcf(vcf_file):
    """
    Get the individuals in a VCF
    """
    for _line in vcf_file:
        line = _line.strip('\n')
        if line.startswith('#CHROM'):
            vcf_headers = get_vcf_headers(line)
            return [slugify(indiv_id, separator='_', replace_dot=True) for indiv_id in vcf_headers[9:]]
Esempio n. 11
0
def set_genotypes_from_vcf_fields(vcf_fields, variant, alt_allele_pos, vcf_header_fields, genotype_meta=True, indivs_to_include=None, vcf_id_map=None):
    """
    if variant is a basic variants, initialize its genotypes from vcf_fields
    vcf_header_fields is just a list of the headers in the vcf
    (with the # stripped of the #CHROM in the first column)

    vcf_id_map: dict of [ID in the VCF file] -> [Individual ID]
    """
    num_columns = len(vcf_fields)
    if num_columns != len(vcf_header_fields):
        raise Exception("Wrong number of columns")

    genotypes = {}
    format_str = vcf_fields[8]
    allele_position_map = get_allele_position_map(vcf_fields[3], vcf_fields[4])
    vcf_filter = vcf_fields[6].lower()

    formats = {}
    for i, item in enumerate(format_str.split(':')):
        if item == 'AD':
            formats['ad'] = i
        elif item == 'DP':
            formats['dp'] = i
        elif item == 'GQ':
            formats['gq'] = i
        elif item == 'PL':
            formats['pl'] = i

    if indivs_to_include:
        indivs_to_include = map(slugify, indivs_to_include)
    for col_index in range(9, num_columns):

        vcf_id = slugify(vcf_header_fields[col_index], separator='_')
        if vcf_id_map:
            indiv_id = vcf_id_map.get(vcf_id, vcf_id)
        else:
            indiv_id = vcf_id
        if indivs_to_include and indiv_id not in indivs_to_include:
            continue
        geno_str = vcf_fields[col_index]
        try:
            if genotype_meta:
                genotypes[indiv_id] = get_genotype_from_str(geno_str, formats, alt_allele_pos, allele_position_map, vcf_filter=vcf_filter)
            else:
                raise Exception("genotypes without meta not implemented - need to add kwarg")

        except:
            sys.stdout.write("Could not parse genotype from string: %s with format: %s. Allele_position_map: %s" % (geno_str, format_str, allele_position_map))
            raise

    variant.genotypes = genotypes

    return variant
Esempio n. 12
0
def add_vcf_file_to_project(project, vcf_file):
    """
    Add this VCF file to all the individuals in project that are in the VCF file
    """
    vcf_sample_ids = set(vcf_file.sample_id_list())
    vcf_id_map = {slugify(s, separator='_'): s for s in vcf_sample_ids}
    for individual in project.individual_set.all():
        if individual.indiv_id in vcf_id_map:
            individual.vcf_files.add(vcf_file)
            if individual.indiv_id != vcf_id_map[individual.indiv_id]:
                individual.vcf_id = vcf_id_map[individual.indiv_id]
                individual.save()
Esempio n. 13
0
def add_vcf_file_to_project(project, vcf_file):
    """
    Add this VCF file to all the individuals in project that are in the VCF file
    """
    vcf_sample_ids = set(vcf_file.sample_id_list())
    vcf_id_map = {slugify(s, separator='_'): s for s in vcf_sample_ids}
    for individual in project.individual_set.all():
        if individual.indiv_id in vcf_id_map:
            individual.vcf_files.add(vcf_file)
            if individual.indiv_id != vcf_id_map[individual.indiv_id]:
                individual.vcf_id = vcf_id_map[individual.indiv_id]
                individual.save()
Esempio n. 14
0
def get_ids_from_vcf(vcf_file):
    """
    Get the individuals in a VCF
    """
    for _line in vcf_file:
        line = _line.strip('\n')
        if line.startswith('#CHROM'):
            vcf_headers = get_vcf_headers(line)
            return [
                slugify(indiv_id, separator='_', replace_dot=True)
                for indiv_id in vcf_headers[9:]
            ]
Esempio n. 15
0
def add_breakpoint_from_dict(project, bp):
    """
    Add a breakpoint to the given project based on keys from the given dict.
    
    The sample id is presumed to already be loaded as an existing individual in the project.
    
    If a breakpoint already exists, it is not updated or changed (even if data loaded is
    actually different). Therefore to reload it is necessary to delete first, but it is 
    safe to load new samples incrementally by just running the load again.
    """

    # Fields in dict are chr     start   end     sample  depth   cscore  partner genes   cdsdist
    xpos = genomeloc.get_xpos(bp['chr'], int(bp['start']))
    sample_id = slugify(bp['sample'], separator='_')
    try:
        breakpoint = Breakpoint.objects.get(project=project,
                                            xpos=xpos,
                                            individual__indiv_id=sample_id)
        existing = True
    except Breakpoint.DoesNotExist:
        existing = False
        breakpoint = Breakpoint()

        breakpoint.xpos = xpos
        breakpoint.project = project
        breakpoint.obs = int(bp['depth'])
        breakpoint.individual = Individual.objects.get(project=project,
                                                       indiv_id=sample_id)
        breakpoint.sample_count = int(bp['sample_count'])
        breakpoint.partner = bp['partner']
        breakpoint.consensus = bp['cscore']
        breakpoint.save()

    for gene_symbol, cds_dist in zip(bp['genes'].split(','),
                                     bp['cdsdist'].split(',')):
        if gene_symbol:
            if existing:
                try:
                    gene = BreakpointGene.objects.get(breakpoint=breakpoint,
                                                      gene_symbol=gene_symbol)
                except BreakpointGene.DoesNotExist:
                    gene = BreakpointGene()
            else:
                gene = BreakpointGene()

            gene.breakpoint = breakpoint
            gene.gene_symbol = gene_symbol
            gene.cds_dist = int(cds_dist)
            gene.save()
Esempio n. 16
0
def add_breakpoint_from_dict(project, bp ):
    """
    Add a breakpoint to the given project based on keys from the given dict.

    The sample id is presumed to already be loaded as an existing individual in the project.

    If a breakpoint already exists, it is not updated or changed (even if data loaded is
    actually different). Therefore to reload it is necessary to delete first, but it is
    safe to load new samples incrementally by just running the load again.
    """

    # Fields in dict are chr     start   end     sample  depth   cscore  partner genes   cdsdist
    xpos = genomeloc.get_xpos(bp['chr'], int(bp['start']))
    sample_id = slugify(bp['sample'], separator='_')
    try:
        breakpoint = Breakpoint.objects.get(project=project, xpos=xpos, individual__indiv_id=sample_id)
        existing = True
    except Breakpoint.DoesNotExist:
        existing = False
        breakpoint = Breakpoint()

        breakpoint.xpos = xpos
        breakpoint.project = project
        breakpoint.obs = int(bp['depth'])
        breakpoint.individual = Individual.objects.get(project=project, indiv_id=sample_id)
        breakpoint.sample_count = int(bp['sample_count'])
        breakpoint.partner = bp['partner']
        breakpoint.consensus = bp['cscore']
        breakpoint.save()

    for gene_symbol,cds_dist in zip(bp['genes'].split(','), bp['cdsdist'].split(',')):
        if gene_symbol:
            if existing:
                try:
                    gene = BreakpointGene.objects.get(breakpoint=breakpoint,
                                                      gene_symbol=gene_symbol)
                except BreakpointGene.DoesNotExist:
                    gene = BreakpointGene()
            else:
                gene = BreakpointGene()

            gene.breakpoint = breakpoint
            gene.gene_symbol = gene_symbol
            gene.cds_dist = int(cds_dist)
            gene.save()
Esempio n. 17
0
    def handle(self, *args, **options):

        project_id = args[0]
        project = Project.objects.get(project_id=project_id)
        project_dir = os.path.abspath(args[1])
        project_yaml_file = os.path.join(project_dir, 'project.yaml')

        project_spec = yaml.load(open(project_yaml_file))

        # load in sample IDs that we'll use for the project
        sample_id_file = os.path.join(project_dir,
                                      project_spec['sample_id_list'])
        sample_ids = [l.strip('\n') for l in open(sample_id_file)]
        sample_ids = [slugify(s, separator='_') for s in sample_ids]
        sample_management.add_indiv_ids_to_project(project, sample_ids)

        # set meta info
        project.project_name = project_spec['project_name']
        project.save()

        # nicknames
        if 'nicknames' in project_spec:
            # todo
            pass

        # load individuals
        if 'ped_files' in project_spec:
            for relative_path in project_spec['ped_files']:
                ped_file_path = os.path.join(project_dir, relative_path)
                sample_management.update_project_from_fam(
                    project, open(ped_file_path))
                # todo: add awesome-slugify to above

        # set VCF files
        if 'vcf_files' in project_spec:
            for relative_path in project_spec['vcf_files']:
                vcf_file_path = os.path.join(project_dir, relative_path)
                # todo: this should be a fn somewhere that add_vcf_to_project uses too
                vcf_file = VCFFile.objects.get_or_create(
                    file_path=vcf_file_path)[0]
                sample_management.add_vcf_file_to_project(project, vcf_file)
Esempio n. 18
0
    def handle(self, *args, **options):

        project_id = args[0]
        project = Project.objects.get(project_id=project_id)
        project_dir = os.path.abspath(args[1])
        project_yaml_file = os.path.join(project_dir, 'project.yaml')

        project_spec = yaml.load(open(project_yaml_file))

        # load in sample IDs that we'll use for the project
        sample_id_file = os.path.join(project_dir, project_spec['sample_id_list'])
        sample_ids = [l.strip('\n') for l in open(sample_id_file)]
        sample_ids = [slugify(s, separator='_') for s in sample_ids]
        sample_management.add_indiv_ids_to_project(project, sample_ids)

        # set meta info
        project.project_name = project_spec['project_name']
        project.save()

        # nicknames
        if 'nicknames' in project_spec:
            # todo
            pass

        # load individuals
        if 'ped_files' in project_spec:
            for relative_path in project_spec['ped_files']:
                ped_file_path = os.path.join(project_dir, relative_path)
                sample_management.update_project_from_fam(project, open(ped_file_path))
                # todo: add awesome-slugify to above

        # set VCF files
        if 'vcf_files' in project_spec:
            for relative_path in project_spec['vcf_files']:
                vcf_file_path = os.path.join(project_dir, relative_path)
                # todo: this should be a fn somewhere that add_vcf_to_project uses too
                vcf_file = VCFFile.objects.get_or_create(file_path=vcf_file_path)[0]
                sample_management.add_vcf_file_to_project(project, vcf_file)
Esempio n. 19
0
    def load_project(self, project_id, json_path):
        #from collections import defaultdict
        #objects_by_pk = defaultdict(dict)
        print("------------------")
        project = Project.objects.get(project_id=project_id)
        users = {}
        families = {}
        cohorts = {}
        individuals = {}
        project_tags = {}
        project_phenotypes = {}
        gene_lists = {}
        with open(json_path) as f:
            contents = f.read()
            raw_json_data = json.loads(contents)

            #obj_generator = serializers.json.Deserializer(contents)
            # Couldn't find a way to make Deserializer return foreign key ids
            for obj in raw_json_data:
                #print("Object: " + str(obj))
                obj_pk = obj['pk']
                obj_model = obj['model']
                obj_fields = obj['fields']
                if obj_model == 'base.project':
                    project = Project.objects.get(project_id=project_id)
                    project.project_name = obj_fields['project_name']
                    project.description = obj_fields['description']
                    project.last_accessed_date = obj_fields['last_accessed_date']
                    if obj_fields['private_reference_populations']:
                        #raise ValueError("private_reference_populations not implemented: " + str(obj_fields['private_reference_populations']))
                        pass
                    if 'gene_lists' in obj_fields and obj_fields['gene_lists']:
                        raise ValueError("gene_lists not implemented: " + str(project.gene_lists.all()))

                    print("project: " + str(project))
                    project.save()
                elif obj_model == 'auth.user':
                    try:
                        user_queryset = User.objects.filter(email = obj_fields['email'])
                        assert len(user_queryset) == 1
                        users[obj_pk] = user_queryset[0]
                    except Exception, e:
                        if obj_fields['username'] == 'monkol':
                            users[obj_pk] = User.objects.get(email = '*****@*****.**')
                            continue

                        # users specific to this project
                        #if not any(n in obj_fields['username'] for n in ["username1", "username2", ...]):
                        #    continue


                        print("ERROR couldn't find user %s: %s  %s" % (obj_pk, obj_fields, str(e)))
                        if not obj_fields['email']:
                            continue
                        i = raw_input("Create this user? [y/n] ")
                        if i.lower() != "y":
                            continue
                        print("Creating user: %s" % str(obj_fields))

                        matching_users = User.objects.filter( Q(email = obj_fields['email']) | Q(username=obj_fields['username']) )
                        if matching_users:
                            assert len(matching_users) == 1
                            user = next(matching_users)
                        else:
                            user = User.objects.create(email = obj_fields['email'], username=obj_fields['username'])
                            user.is_active = bool(obj_fields['is_active'])
                            #user.is_superuser = bool(obj_fields['is_superuser'])
                            #user.is_staff = bool(obj_fields['is_staff'])
                            user.last_login = obj_fields['last_login']
                            user.groups = obj_fields['groups']
                            user.password = obj_fields['password']
                            user.date_joined = obj_fields['date_joined']
                            user.save()
                        users[obj_pk] = user

                elif obj_model == 'base.projectcollaborator':
                    collaborator, created = ProjectCollaborator.objects.get_or_create(
                        project=project,
                        user=users[obj_fields["user"]])
                    collaborator.collaborator_type = obj_fields['collaborator_type']
                    collaborator.save()
                elif obj_model == 'base.family':
                    try:
                        family = Family.objects.get(project=project, family_id=slugify(obj_fields['family_id'], separator='_'))
                    except Exception, e:
                        print("ERROR: family not found in local db: " + slugify(obj_fields['family_id'], separator='_'))
                        continue
Esempio n. 20
0
                    family_group, created = FamilyGroup.objects.get_or_create(project=project,
                        slug=obj_fields['slug'],
                        name=obj_fields['name'],
                        description=obj_fields['description'])
                    if not family_group.families.all():
                        for family_id in obj_fields['families']:
                            family_group.families.add(families[family_id])
                    print("familygroup: " + str(family_group))
                    family_group.save()
                elif obj_model == 'base.familyimageslide':
                    raise ValueError("FamilyImageSlide not implemented")
                elif obj_model == 'base.cohort':
                    cohorts[obj_pk] = obj
                    print("WARNING: Cohort not implemented. Won't deserialize: " + str(obj))
                elif obj_model == "base.individual":
                    obj_fields['indiv_id'] = slugify(obj_fields['indiv_id'], separator='_')
                    try:
                        individual = individuals[obj_pk] = Individual.objects.get(project=project, indiv_id=obj_fields['indiv_id'])
                    except:
                        print("ERROR: individual not found in local db: " + obj_fields['indiv_id'])
                        continue

                    print("individual: " + slugify(obj_fields['indiv_id'], separator='_'))
                    individual.nickname = obj_fields['nickname']
                    individual.other_notes = obj_fields['other_notes']
                    individual.save()
                elif obj_model == "base.causalvariant":
                    causal_variant, created = CausalVariant.objects.get_or_create(
                        family = families[obj_fields["family"]],
                        variant_type=obj_fields["variant_type"],
                        xpos=obj_fields["xpos"],
Esempio n. 21
0
import argparse
import os
from xbrowse.utils import slugify

if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Convert any PED file to the xbrowse dialect')
    parser.add_argument('ped')
    args = parser.parse_args()

    filename = args.ped
    if not os.path.exists(filename):
        raise Exception('File does not exist')
    if '.' not in filename:
        raise Exception('Filename must have an extension.')
    out_filename = filename + '.xbrowse.ped'
    outfile = open(out_filename, 'w')

    for line in open(filename):
        fields = line.strip('\n').split('\t')
        for i in [2,3,4,5]:
            if fields[i] == '0':
                fields[i] = '.'
        for i in [0,1,2,3]:
            if fields[i] != '.':
                fields[i] = slugify(fields[i], separator='_')
        outfile.write('\t'.join(fields)+'\n')
    outfile.close()
Esempio n. 22
0
                        name=obj_fields['name'],
                        description=obj_fields['description'])
                    if not family_group.families.all():
                        for family_id in obj_fields['families']:
                            family_group.families.add(families[family_id])
                    print("familygroup: " + str(family_group))
                    family_group.save()
                elif obj_model == 'base.familyimageslide':
                    raise ValueError("FamilyImageSlide not implemented")
                elif obj_model == 'base.cohort':
                    cohorts[obj_pk] = obj
                    print(
                        "WARNING: Cohort not implemented. Won't deserialize: "
                        + str(obj))
                elif obj_model == "base.individual":
                    obj_fields['indiv_id'] = slugify(obj_fields['indiv_id'],
                                                     separator='_')
                    try:
                        individual = individuals[
                            obj_pk] = Individual.objects.get(
                                project=project,
                                indiv_id=obj_fields['indiv_id'])
                    except:
                        print("ERROR: individual not found in local db: " +
                              obj_fields['indiv_id'])
                        continue

                    print("individual: " +
                          slugify(obj_fields['indiv_id'], separator='_'))
                    individual.nickname = obj_fields['nickname']
                    individual.other_notes = obj_fields['other_notes']
                    individual.save()
Esempio n. 23
0
import argparse
import os
from xbrowse.utils import slugify

if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        description='Convert any PED file to the xbrowse dialect')
    parser.add_argument('ped')
    args = parser.parse_args()

    filename = args.ped
    if not os.path.exists(filename):
        raise Exception('File does not exist')
    if '.' not in filename:
        raise Exception('Filename must have an extension.')
    out_filename = filename + '.xbrowse.ped'
    outfile = open(out_filename, 'w')

    for line in open(filename):
        fields = line.strip('\n').split('\t')
        for i in [2, 3, 4, 5]:
            if fields[i] == '0':
                fields[i] = '.'
        for i in [0, 1, 2, 3]:
            if fields[i] != '.':
                fields[i] = slugify(fields[i], separator='_')
        outfile.write('\t'.join(fields) + '\n')
    outfile.close()
Esempio n. 24
0
                        description=obj_fields['description'])
                    if not family_group.families.all():
                        for family_id in obj_fields['families']:
                            if family_id in families:
                                family_group.families.add(families[family_id])
                            else:
                                print("WARNING: family not found: " + family_id)
                    print("familygroup: " + str(family_group))
                    family_group.save()
                elif obj_model == 'base.familyimageslide':
                    raise ValueError("FamilyImageSlide not implemented")
                elif obj_model == 'base.cohort':
                    cohorts[obj_pk] = obj
                    print("WARNING: Cohort not implemented. Won't deserialize: " + str(obj))
                elif obj_model == "base.individual":
                    obj_fields['indiv_id'] = slugify(obj_fields['indiv_id'], separator='_')
                    try:
                        individual = individuals[obj_pk] = Individual.objects.get(project=project, indiv_id=obj_fields['indiv_id'])
                    except:
                        print("ERROR: individual not found in local db: " + obj_fields['indiv_id'])
                        continue

                    print("individual: " + slugify(obj_fields['indiv_id'], separator='_'))
                    individual.nickname = obj_fields['nickname']
                    individual.other_notes = obj_fields['other_notes']
                    individual.save()
                elif obj_model == "base.causalvariant":
                    causal_variant, created = CausalVariant.objects.get_or_create(
                        family = families[obj_fields["family"]],
                        variant_type=obj_fields["variant_type"],
                        xpos=obj_fields["xpos"],
Esempio n. 25
0
    def handle(self, *args, **options):
        
        if len(args) < 2:
            print("Usage: ./manage.py load_project_dir <project_id> <project_path>")
            print("")
            sys.exit(1)

        project_id = args[0]
        try:
            project = Project.objects.get(project_id=project_id)

        except Project.DoesNotExist:
            print("\nError:")
            print("\nNo project could be found with id '%s'" % project_id)
            print("")
            print("Please use the add_project command first to add this project before loading it.")
            print("")
            sys.exit(1)

        project_dir = os.path.abspath(args[1])
        project_yaml_file = os.path.join(project_dir, 'project.yaml')

        project_spec = yaml.load(open(project_yaml_file))

        # load in sample IDs that we'll use for the project
        sample_id_file = os.path.join(project_dir, project_spec['sample_id_list'])
        sample_ids = [l.strip('\n') for l in open(sample_id_file)]
        sample_ids = [slugify(s, separator='_') for s in sample_ids]
        sample_management.add_indiv_ids_to_project(project, sample_ids)

        # set meta info
        project.project_name = project_spec['project_name']
        project.save()

        # nicknames
        if 'nicknames' in project_spec:
            # todo
            pass

        # load individuals
        if 'ped_files' in project_spec:
            for relative_path in project_spec['ped_files']:
                ped_file_path = os.path.join(project_dir, relative_path)
                sample_management.update_project_from_fam(project, open(ped_file_path))
                # todo: add awesome-slugify to above

        # set VCF files
        if 'vcf_files' in project_spec:
            for relative_path in project_spec['vcf_files']:
                vcf_file_path = os.path.join(project_dir, relative_path)
                # todo: this should be a fn somewhere that add_vcf_to_project uses too
                vcf_file = VCFFile.objects.get_or_create(file_path=vcf_file_path)[0]
                sample_management.add_vcf_file_to_project(project, vcf_file)

        if 'breakpoint_files' in project_spec:
            for relative_path in project_spec['breakpoint_files']:
                breakpoint_file = BreakpointFile()
                breakpoint_file.project = project
                breakpoint_file.file_path = os.path.join(project_dir, relative_path)
                breakpoint_file.save()
                print("Adding breakpoint file: %s" % breakpoint_file.file_path)
Esempio n. 26
0
    def load_project(self, project_id, json_path):
        #from collections import defaultdict
        #objects_by_pk = defaultdict(dict)
        print("------------------")
        project = Project.objects.get(project_id=project_id)
        users = {}
        families = {}
        cohorts = {}
        individuals = {}
        project_tags = {}
        project_phenotypes = {}
        gene_lists = {}
        with open(json_path) as f:
            contents = f.read()
            raw_json_data = json.loads(contents)

            #obj_generator = serializers.json.Deserializer(contents)
            # Couldn't find a way to make Deserializer return foreign key ids
            for obj in raw_json_data:
                #print("Object: " + str(obj))
                obj_pk = obj['pk']
                obj_model = obj['model']
                obj_fields = obj['fields']
                if obj_model == 'base.project':
                    project = Project.objects.get(project_id=project_id)
                    project.project_name = obj_fields['project_name']
                    project.description = obj_fields['description']
                    project.last_accessed_date = obj_fields['last_accessed_date']
                    if obj_fields['private_reference_populations']:
                        #raise ValueError("private_reference_populations not implemented: " + str(obj_fields['private_reference_populations']))
                        pass
                    if 'gene_lists' in obj_fields and obj_fields['gene_lists']:
                        raise ValueError("gene_lists not implemented: " + str(project.gene_lists.all()))

                    print("project: " + str(project))
                    project.save()
                elif obj_model == 'auth.user':
                    try:
                        user_queryset = User.objects.filter(email = obj_fields['email'])
                        assert len(user_queryset) == 1
                        users[obj_pk] = user_queryset[0]
                    except Exception, e:
                        if obj_fields['username'] == 'monkol':
                            users[obj_pk] = User.objects.get(email = '*****@*****.**')
                            continue

                        # users specific to this project
                        #if not any(n in obj_fields['username'] for n in ["username1", "username2", ...]):
                        #    continue


                        print("ERROR couldn't find user %s: %s  %s" % (obj_pk, obj_fields, str(e)))
                        if not obj_fields['email']:
                            continue
                        i = raw_input("Create this user? [y/n] ")
                        if i.lower() != "y":
                            continue
                        print("Creating user: %s" % str(obj_fields))

                        matching_users = User.objects.filter( Q(email = obj_fields['email']) | Q(username=obj_fields['username']) )
                        if matching_users:
                            assert len(matching_users) == 1
                            user = next(matching_users)
                        else:
                            user = User.objects.create(email = obj_fields['email'], username=obj_fields['username'])
                            user.is_active = bool(obj_fields['is_active'])
                            #user.is_superuser = bool(obj_fields['is_superuser'])
                            #user.is_staff = bool(obj_fields['is_staff'])
                            user.last_login = obj_fields['last_login']
                            user.groups = obj_fields['groups']
                            user.password = obj_fields['password']
                            user.date_joined = obj_fields['date_joined']
                            user.save()
                        users[obj_pk] = user

                elif obj_model == 'base.projectcollaborator':
                    collaborator, created = ProjectCollaborator.objects.get_or_create(
                        project=project,
                        user=users[obj_fields["user"]])
                    collaborator.collaborator_type = obj_fields['collaborator_type']
                    collaborator.save()
                elif obj_model == 'base.family':
                    try:
                        family = Family.objects.get(project=project, family_id=slugify(obj_fields['family_id'], separator='_'))
                    except Exception, e:
                        print("ERROR: family not found in local db: " + slugify(obj_fields['family_id'], separator='_'))
                        continue