예제 #1
0
def persist_part_overhang_annotations(repo, vectorname, part, authorid, date):
    nsa, fiveprimeoverhang, threeprimeoverhang = get_nucseq_annotations(
        repo, vectorname)

    repository.add_feature_to_nucseq(
        repo, fiveprimeoverhang['feature']['name'] + " in " + part['name'],
        part['nucseq'], fiveprimeoverhang['feature'], 0, authorid, date)

    repository.add_feature_to_nucseq(
        repo, threeprimeoverhang['feature']['name'] + " in " + part['name'],
        part['nucseq'], threeprimeoverhang['feature'],
        len(part['nucseq']['sequence']) -
        len(threeprimeoverhang['feature']['nucseq']['sequence']), authorid,
        date)
예제 #2
0
def persist_part_feature(repo, vectorname, part, familyname, authorid, date):

    nsa, fiveprimeoverhang, threeprimeoverhang = get_nucseq_annotations(
        repo, vectorname)

    partseq = part['nucseq']['sequence'].strip().lower()
    start = len(fiveprimeoverhang['feature']['nucseq']['sequence'].strip())
    end = len(partseq) - len(
        threeprimeoverhang['feature']['nucseq']['sequence'].strip())
    partfeatureseq = partseq[start:end]

    partfeature = repository.create_feature(repo, 'Feature-' + part['name'],
                                            partfeatureseq, familyname, date)

    repository.add_feature_to_nucseq(
        repo, 'Feature-' + part['name'], part['nucseq'], partfeature,
        len(fiveprimeoverhang['feature']['nucseq']['sequence']), authorid,
        date)
예제 #3
0
def create_moclo_constituent_part_features(repo, pt, constituentparts, authorid, datecreated):
    '''
    Adds overhangs for constituent parts to repo.
    '''

    currfeaturestart = 0

    count = 0
    for cp in constituentparts:
        fiveprimeoverhang = repository.get_moclo_overhang_annotation(repo, cp['nucseq'], 'FIVE_PRIME')

        # For the first part, find the fiveprimeoverhang, add it to the repo
        if cp == constituentparts[0]:
            featurestart = pt['nucseq']['sequence'].strip()\
                .index(fiveprimeoverhang['feature']['nucseq']['sequence'].strip(), currfeaturestart)

            if featurestart < 0:
                raise ValueError ('Unable to find overhang annotation in the composite part.')
            if featurestart != currfeaturestart:
                raise ValueError("5' overhang found in composite part does not follow "
                                 "Moclo overhang rule: " + str(featurestart) + ' ' + str(currfeaturestart))
            repository.add_feature_to_nucseq(repo, pt['name'], pt['nucseq'],
                                          fiveprimeoverhang['feature'], featurestart, authorid, datecreated)
            currfeaturestart = featurestart + len(fiveprimeoverhang['feature']['nucseq']['sequence'].strip())


        # For each overhang annotation, find the start position in the sequence, and add it to the repo
        nsa = get_nonoverhang_annotations(repo, cp['nucseq'])
        for n in nsa:
            featurestart = pt['nucseq']['sequence'].strip().index(n['feature']['nucseq']['sequence']
                                                                  .strip(), currfeaturestart)

            if featurestart < 0:
                raise ValueError('Could not find ' + n['feature']['name'] + ' in ' + pt['name'] + '.')
            currfeaturestart = featurestart + len(n['feature']['nucseq']['sequence'].strip())
            repository.add_feature_to_nucseq(repo, pt['name'], pt['nucseq'],
                                          n['feature'], featurestart, authorid, datecreated)


        # Get the threeprimeoverhang anno, find where it starts, add it to the repo
        threeprimeoverhang = repository.get_moclo_overhang_annotation(repo, cp['nucseq'], 'THREE_PRIME')
        featurestart = pt['nucseq']['sequence'].strip()\
            .index(threeprimeoverhang['feature']['nucseq']['sequence'].strip(), currfeaturestart)

        if featurestart < 0:
            raise ValueError('Unable to find overhang annotation in the composite part.')
        if featurestart != currfeaturestart:
            raise ValueError("3' overhang found in composite part that does not follow Moclo rules: ", featurestart,
                             " ", currfeaturestart)
        repository.add_feature_to_nucseq(repo, pt['name'], pt['nucseq'], threeprimeoverhang['feature'],
                                      featurestart, authorid, datecreated)

        currfeaturestart = featurestart + len(fiveprimeoverhang['feature']['nucseq']['sequence'].strip())
        count += 1
예제 #4
0
def process_vectors(repo, project, vectorsfiles, directories, instanceid,
                    authorid, date):

    for vectorsfile in vectorsfiles:
        with open(vectorsfile) as file:
            lines = file.readlines()
        validate_input_file(lines[0], vectorsfile)

        if len(lines) <= 1:
            continue

        vectors = {}
        vectors['authorid'] = authorid
        vectors['datecreated'] = date
        vectors['description'] = 'Vectors described in ' + instanceid
        collectionid = uuid.uuid4()
        vectors['idcollection'] = collectionid
        repo['collections'].append(vectors)
        repository.add_object_to_collection(repo, project['idcollection'],
                                            vectors['idcollection'],
                                            'COLLECTION', authorid, date)

        lineno = 0
        for line in lines[1:]:
            tokens = line.split(',')
            tokens = [t.strip() for t in tokens if len(t.strip()) > 0]
            if len(tokens) < 5:
                raise ValueError(
                    'The Values.csv file does not have the required number of tokens on line '
                    + lineno)

            vectorfilename = tokens[0]
            vectorname = 'Vector-' + tokens[1]
            resistancename = 'Resistance-' + tokens[2]
            fiveprimeoverhangname = 'Overhang-' + tokens[3]
            threeprimeoverhangname = 'Overhang-' + tokens[4]

            if len(tokens) > 5:
                description = tokens[5]
            else:
                description = 'From ' + vectorfilename + ': ' + \
                              fiveprimeoverhangname + ', ' + \
                              threeprimeoverhangname + ', ' + resistancename

            directories = [d for d in directories if os.path.isdir(d)]
            for directory in directories:
                files = os.listdir(directory)
                if vectorfilename in files:
                    vectorsequence = read_genbank_file(directory + '/' +
                                                       vectorfilename)
                    break

            vector = {}
            vector['authorid'] = authorid
            vector['datecreated'] = date
            vector['description'] = description
            vector['name'] = vectorname
            vectorid = uuid.uuid4()
            vector['idvector'] = vectorid

            nucseq = {}
            nucseq['datecreated'] = date
            nucseq['idnucseq'] = vectorid
            nucseq['sequence'] = vectorsequence

            vector['nucseq'] = nucseq
            vector['iscircular'] = True

            repo['nucseq'].append(nucseq)
            repo['vectors'].append(vector)

            ft2 = {}
            foundfeature1 = False
            foundfeature2 = False

            overhangfeatures = repository.get_features_by_family_name(
                repo, 'overhang')

            for feature in overhangfeatures:

                if not foundfeature1 and feature['name'].upper(
                ) == fiveprimeoverhangname.upper():
                    position = repository.get_overhang_position_in_vector(
                        vectorsequence, feature['nucseq']['sequence'])
                    repository.add_feature_to_nucseq(repo, vectorname, nucseq,
                                                     feature, position,
                                                     authorid, date)
                    foundfeature1 = True

                if not foundfeature2 and feature['name'].upper(
                ) == threeprimeoverhangname.upper():
                    position = repository.get_overhang_position_in_vector(
                        vectorsequence, feature['nucseq']['sequence'])
                    repository.add_feature_to_nucseq(repo, vectorname, nucseq,
                                                     feature, position,
                                                     authorid, date)
                    ft2 = feature
                    foundfeature2 = True

            if not foundfeature1 or not foundfeature2:
                raise ValueError(
                    'The overhangs caused by vector ' + vectorname +
                    ' were not defined in the overhangs manifest.')

            foundfeature = False

            for feature in repository.get_features_by_family_name(
                    repo, 'resistance'):
                if feature['name'].upper() == resistancename.upper():
                    position = nucseq['sequence'].find(
                        feature['nucseq']['sequence'])
                    repository.add_feature_to_nucseq(repo, vectorname, nucseq,
                                                     feature, position,
                                                     authorid, date)
                    foundfeature = True

            if not foundfeature:
                overhangpos = repository.get_overhang_position_in_vector(
                    vectorsequence, ft2['nucseq']['sequence'])
                if overhangpos < 0:
                    raise ValueError('The overhang ' + ft2['name'] +
                                     ' could not be found in the vector ' +
                                     vectorname)
                startpos = overhangpos + len(ft2['nucseq']['sequence']) + 1
                resistancesequence = nucseq['sequence'][
                    startpos:len(nucseq['sequence'])]
                f = repository.create_feature(repo, resistancename,
                                              resistancesequence, 'resistance',
                                              date)
                position = nucseq['sequence'].find(f['nucseq']['sequence'])
                repository.add_feature_to_nucseq(repo, vectorname, nucseq, f,
                                                 position, authorid, date)
                repository.add_object_to_collection(repo, collectionid,
                                                    f['idfeature'], 'FEATURE',
                                                    authorid, date)

            repository.add_object_to_collection(repo, collectionid, vectorid,
                                                'VECTOR', authorid, date)
            lineno += 1