예제 #1
0
    def _build_new_sequence(self, sequence: ReceptorSequence, position, signal: dict) -> ReceptorSequence:

        gap_length = signal["motif_instance"].gap
        if "/" in signal["motif_instance"].instance:
            motif_left, motif_right = signal["motif_instance"].instance.split("/")
        else:
            motif_left = signal["motif_instance"].instance
            motif_right = ""

        gap_start = position+len(motif_left)
        gap_end = gap_start+gap_length
        part1 = sequence.get_sequence()[:position]
        part2 = sequence.get_sequence()[gap_start:gap_end]
        part3 = sequence.get_sequence()[gap_end+len(motif_right):]

        new_sequence_string = part1 + motif_left + part2 + motif_right + part3

        annotation = SequenceAnnotation()
        implant = ImplantAnnotation(signal_id=signal["signal_id"],
                                    motif_id=signal["motif_id"],
                                    motif_instance=signal["motif_instance"],
                                    position=position)
        annotation.add_implant(implant)

        new_sequence = ReceptorSequence()
        new_sequence.set_annotation(annotation)
        new_sequence.set_metadata(copy.deepcopy(sequence.metadata))
        new_sequence.set_sequence(new_sequence_string, EnvironmentSettings.get_sequence_type())

        return new_sequence
예제 #2
0
    def _create_new_sequences(self, sequences, new_sequence_count,
                              signal) -> List[ReceptorSequence]:
        new_sequences = sequences[:-new_sequence_count]

        for _ in range(new_sequence_count):

            motif = random.choice(signal.motifs)
            motif_instance = motif.instantiate_motif()
            annotation = SequenceAnnotation([
                ImplantAnnotation(signal_id=signal.id,
                                  motif_id=motif.identifier,
                                  motif_instance=motif_instance.instance,
                                  position=0)
            ])
            metadata = SequenceMetadata(v_gene="TRBV6-1",
                                        j_gene="TRBJ2-7",
                                        count=1,
                                        chain="B")

            new_sequences.append(
                ReceptorSequence(amino_acid_sequence=motif_instance.instance,
                                 annotation=annotation,
                                 metadata=metadata))

        return new_sequences
예제 #3
0
    def _make_sequence_object(self, row, load_implants: bool = False):

        fields = row.dtype.names

        implants = []
        if load_implants:
            keys = [
                key for key in row.dtype.names if key not in Repertoire.FIELDS
            ]
            for key in keys:
                value_dict = row[key]
                if value_dict:
                    try:
                        implants.append(
                            ImplantAnnotation(**ast.literal_eval(value_dict)))
                    except (SyntaxError, ValueError, TypeError) as e:
                        pass

        seq = ReceptorSequence(
            amino_acid_sequence=row["sequence_aas"]
            if "sequence_aas" in fields else None,
            nucleotide_sequence=row["sequences"]
            if "sequences" in fields else None,
            identifier=row["sequence_identifiers"]
            if "sequence_identifiers" in fields else None,
            metadata=SequenceMetadata(
                v_gene=row["v_genes"] if "v_genes" in fields else None,
                j_gene=row["j_genes"] if "j_genes" in fields else None,
                v_subgroup=row["v_subgroups"]
                if "v_subgroups" in fields else None,
                j_subgroup=row["j_subgroups"]
                if "j_subgroups" in fields else None,
                v_allele=row["v_alleles"] if "v_alleles" in fields else None,
                j_allele=row["j_alleles"] if "j_alleles" in fields else None,
                chain=row["chains"] if "chains" in fields else None,
                count=row["counts"] if "counts" in fields
                and not NumpyHelper.is_nan_or_empty(row['counts']) else None,
                region_type=row["region_types"]
                if "region_types" in fields else None,
                frame_type=row["frame_types"]
                if "frame_types" in fields else "IN",
                cell_id=row["cell_ids"] if "cell_ids" in fields else None,
                custom_params={
                    key: row[key] if key in fields else None
                    for key in set(self.fields) - set(Repertoire.FIELDS)
                }),
            annotation=SequenceAnnotation(implants=implants))

        return seq
예제 #4
0
 def create_from_record(cls, record: np.void):
     if 'version' in record.dtype.names and record['version'] == cls.version:
         return ReceptorSequence(
             **{
                 **{
                     key: record[key]
                     for key, val_type in ReceptorSequence.FIELDS.items() if val_type == str and key != 'version'
                 },
                 **{
                     'metadata':
                     SequenceMetadata(**json.loads(record['metadata'])),
                     'annotation':
                     SequenceAnnotation(**json.loads(record['annotation']))
                 }
             })
     else:
         raise NotImplementedError
예제 #5
0
    def _make_sequence_object(self, row):

        fields = row.dtype.names

        keys = [key for key in row.dtype.names if "signal" in key]
        implants = []
        for key in keys:
            value_dict = row[key]
            if value_dict:
                implants.append(
                    ImplantAnnotation(**ast.literal_eval(value_dict)))

        seq = ReceptorSequence(
            amino_acid_sequence=row["sequence_aas"]
            if "sequence_aas" in fields else None,
            nucleotide_sequence=row["sequences"]
            if "sequences" in fields else None,
            identifier=row["sequence_identifiers"]
            if "sequence_identifiers" in fields else None,
            metadata=SequenceMetadata(
                v_gene=row["v_genes"] if "v_genes" in fields else None,
                j_gene=row["j_genes"] if "j_genes" in fields else None,
                v_subgroup=row["v_subgroups"]
                if "v_subgroups" in fields else None,
                j_subgroup=row["j_subgroups"]
                if "j_subgroups" in fields else None,
                v_allele=row["v_alleles"] if "v_alleles" in fields else None,
                j_allele=row["j_alleles"] if "j_alleles" in fields else None,
                chain=row["chains"] if "chains" in fields else None,
                count=row["counts"] if "counts" in fields else None,
                region_type=row["region_types"]
                if "region_types" in fields else None,
                frame_type=row["frame_types"]
                if "frame_types" in fields else "IN",
                cell_id=row["cell_ids"] if "cell_ids" in fields else None,
                custom_params={
                    key: row[key] if key in fields else None
                    for key in set(self.fields) - set(Repertoire.FIELDS)
                }),
            annotation=SequenceAnnotation(implants=implants))

        return seq