Beispiel #1
0
def importPrimerPairs(fastafile):
    primerfile = MultiFasta(fastafile)
    primers = primerfile.createPrimers(
        config['targeting']['bowtieindex'])  # places in genome
    for primer in primers:
        primer.calcProperties()  # calc Tm and GC
    # find pairs
    left_suffix, rite_suffix = ['F', 'f', 'L', 'l', '5',
                                'left'], ['R', 'r', '3', 'right']
    pairs = []
    for i in range(len(primers)):
        primerI = [
            '_'.join(primers[i].name.split('_')[:-1]),
            primers[i].name.split('_')[-1]
        ]
        for j in range(i, len(primers)):
            if i == j: continue  # skip same primer
            primerJ = [
                '_'.join(primers[j].name.split('_')[:-1]),
                primers[j].name.split('_')[-1]
            ]
            if primerI[0] == primerJ[0]:
                if primerI[1] in left_suffix and primerJ[1] in rite_suffix:
                    pairs.append([primers[i], primers[j]])
                elif primerJ[1] in left_suffix and primerI[1] in rite_suffix:
                    pairs.append([primers[j], primers[i]])
                else:
                    raise Exception('saasd')
    # return valid pairs
    return pairs
Beispiel #2
0
def importPrimerPairs(fastafile):
    primerfile = MultiFasta(fastafile)
    primers = primerfile.createPrimers(config['targeting']['bowtieindex'])  # places in genome
    for primer in primers:
        primer.calcProperties()  # calc Tm and GC
    # find pairs
    left_suffix, rite_suffix = ['F','f','L','l','5','left'],['R','r','3','right']
    pairs = []
    for i in range(len(primers)):
        primerI = [ '_'.join(primers[i].name.split('_')[:-1]), primers[i].name.split('_')[-1] ]
        for j in range(i,len(primers)):
            if i==j: continue  # skip same primer
            primerJ = [ '_'.join(primers[j].name.split('_')[:-1]), primers[j].name.split('_')[-1] ]
            if primerI[0] == primerJ[0]:
                if primerI[1] in left_suffix and primerJ[1] in rite_suffix:
                    pairs.append([primers[i], primers[j]])
                elif primerJ[1] in left_suffix and primerI[1] in rite_suffix:
                    pairs.append([primers[j], primers[i]])
                else:
                    raise Exception('saasd')
    # return valid pairs
    return pairs
Beispiel #3
0
def importPrimerPairs(inputfile, config, primer3=True):
    # read table/fasta
    primersets = defaultdict(list)  # pair primersets
    primertags = {}  # primer tags from table
    if not inputfile.split('.')[-1].startswith(
            'fa'):  # ignores duplicate sequence
        primerseqs = {}
        fastafile = 'import_' + fileMD5(inputfile)[:8] + '.fasta'
        with open(fastafile, 'w') as outfh:
            with open(inputfile) as infh:
                for i, line in enumerate(infh):
                    if i == 0:
                        minimalHeader = set([
                            'primername', 'primerset', 'tag', 'sequence',
                            'vessel', 'well'
                        ])
                        header = map(lambda x: x.lower(),
                                     line.rstrip().split('\t'))
                        try:
                            assert not minimalHeader.difference(set(header))
                        except:
                            print >> sys.stderr, 'ERROR: Missing columns (%s)' % ','.join(
                                list(minimalHeader.difference(set(header))))
                            raise Exception('FileHeaderError')
                    else:
                        f = map(lambda x: x.strip('"'),
                                line.rstrip().split('\t'))
                        l = dict(zip(header, f))
                        # remove tag from sequence
                        if l['tag']:
                            try:
                                tagseqs = config['ordersheet']['sequencetags'][
                                    l['tag']]['tags']
                            except:
                                pass
                            else:
                                for t in tagseqs:
                                    if l['sequence'].startswith(t):
                                        l['sequence'] = l['sequence'][len(t):]
                                        break
                        # store metadata and write fasta
                        if l['primername'] in primerseqs.keys():
                            try:
                                assert l['sequence'] == primerseqs[
                                    l['primername']]
                                assert l['tag'] == primertags[l['primername']]
                            except:
                                print >> sys.stderr, l['primername']
                                print >> sys.stderr, primerseqs[
                                    l['primername']]
                                print >> sys.stderr, primertags[
                                    l['primername']]
                                raise Exception('ImportFormattingError')
                        else:
                            print >> outfh, '>' + l['primername']
                            print >> outfh, l['sequence']
                        if l['primerset']:
                            primersets[l['primername']].append(l['primerset'])
                        primertags[l['primername']] = l['tag']
                        primerseqs[l['primername']] = l['sequence']
        primerfile = MultiFasta(fastafile)
    else:
        primerfile = MultiFasta(inputfile)
        # set default tags for import
        for r in primerfile.references:
            primertags[r] = config['import']['tag']
    print >> sys.stderr, "Placing primers on genome..."
    # Align primers to genome
    primers = primerfile.createPrimers(config['design']['bowtieindex'], \
        delete=False,tags=primertags, \
        tmThreshold=config['design']['mispriming']['minimaltm'], \
        endMatch=config['design']['mispriming']['identity3prime'])  # places in genome
    # pair primers (by name or by primerset) MAKE COPIES!!!!
    pairs = {}
    for p in primers:
        setnames = primersets[p.name] \
            if p.name in primersets.keys() and len(primersets[p.name])>0 \
            else [ parsePrimerName(p.name)[0] ]
        for setname in setnames:
            try:
                pairs[setname]
            except KeyError:
                try:
                    pairs[setname] = PrimerPair([None, None], name=setname)
                except:
                    print >> sys.stderr, '>>', primersets[
                        p.name], '|', p.name, '|', setnames, '<'
                    raise
            except:
                raise
            # get primer orientation (might be wrong if guesses from name, will correct after)
            ## this basically just makes sure primers get paired (one fwd, one reverse)
            reverse = p.targetposition.reverse if p.targetposition else parsePrimerName(
                p.name)[1] < 0
            try:
                if reverse and pairs[setname][1] is None:
                    pairs[setname][1] = deepcopy(p)
                else:
                    if pairs[setname][0] is None:
                        pairs[setname][0] = deepcopy(p)
                    else:
                        assert pairs[setname][1] is None
                        pairs[setname][1] = deepcopy(p)
            except:
                print >> sys.stderr, "ERROR: Primer pair strand conflict?"
                print >> sys.stderr, "PRIMER0", pairs[setname][0]
                print >> sys.stderr, "PRIMER1", pairs[setname][1]
                print >> sys.stderr, "REVERSE", reverse
                print >> sys.stderr, "SETNAME", setname
                print >> sys.stderr, "PRIMER", p.name, parsePrimerName(p.name)
                print >> sys.stderr, "PAIRS", pairs[setname]
                raise
    # check if any unpaired primers
    for k, v in pairs.items():
        if not all(v):
            print >> sys.stderr, "WARNING: primer set %s is incomplete and skipped" % k
            del pairs[k]
    # prune ranks in primer3 mode (renames pair)
    if primer3:
        for p in pairs.values():
            assert p[0].targetposition and p[
                1].targetposition  # make sure target postiions are set
            p.pruneRanks()
        validPairs = pairs.values()
    else:  # guess target if not set
        validPairs = []
        print >> sys.stderr, 'Identifying correct amplicons for unplaced primer pairs...'
        for p in pairs.values():
            if not p[0].targetposition or not p[1].targetposition:
                amplicons = p.amplicons(config['import']['ampliconsize'],
                                        autoreverse=True)
                if amplicons:
                    shortest = sorted(
                        amplicons,
                        key=lambda x: len(x[2]))[0]  # sort amplicons by size
                    if len(amplicons) > 1:
                        print >> sys.stderr, 'WARNING: multiple amplicons for {}. Assuming shortest ({}bp) is correct.'.format(
                            p.name, str(len(shortest[2])))
                    p[0].targetposition = shortest[0]  # m
                    p[1].targetposition = shortest[1]  # n
                    validPairs.append(p)
                elif not primer3:
                    # try to find amplicon by sequence matching if no amplicons from genome mapping with sufficient Tm
                    refGenome = Genome(config['design']['genome'])
                    # get new loci (one round)
                    newLoci = [[], []]
                    for mapped, query in [[0, 1], [1, 0]]:
                        for l in p[mapped].loci:
                            newLoci[query] += refGenome.primerMatch(
                                l, p[query].seq,
                                config['import']['ampliconsize'])
                    # add new loci
                    if not newLoci[0] and not newLoci[1]:
                        print >> sys.stderr, 'WARNING: {} is not specific and not imported ({},{})'.format(
                            p.name, len(p[0].loci), len(p[1].loci))
                        continue
                    else:  # add new loci
                        for i, loc in enumerate(newLoci):
                            p[i].loci += loc
                            p[i].loci = list(set(
                                p[i].loci))  # remove redundancy
                    # store new amplicon
                    amplicons = p.amplicons(config['import']['ampliconsize'],
                                            autoreverse=True)
                    if amplicons:
                        p[0].targetposition = amplicons[0][0]  # m
                        p[1].targetposition = amplicons[0][1]  # n
                        validPairs.append(p)
                    else:
                        print >> sys.stderr, '\n'.join(
                            ["-> " + str(l) for l in p[0].loci])
                        print >> sys.stderr, '\n'.join(
                            ["<- " + str(l) for l in p[1].loci])
                        print >> sys.stderr, 'WARNING: Primer set {} has no valid amplicons ({},{})'.format(
                            p.name, len(p[0].loci), len(p[1].loci))
                else:
                    print >> sys.stderr, 'WARNING: Primer set {} does not produce a well-sized, unique amplicon ({},{})'.format(
                        p.name, len(p[0].loci), len(p[1].loci))
            else:
                validPairs.append(p)
    return validPairs
Beispiel #4
0
def importPrimerPairs(inputfile, config, primer3=True):
    # read table/fasta
    primersets = defaultdict(list)  # pair primersets
    primertags = {}  # primer tags from table
    if not inputfile.split(".")[-1].startswith("fa"):  # ignores duplicate sequence
        primerseqs = {}
        fastafile = "import_" + fileMD5(inputfile)[:8] + ".fasta"
        with open(fastafile, "w") as outfh:
            with open(inputfile) as infh:
                for i, line in enumerate(infh):
                    if i == 0:
                        minimalHeader = set(["primername", "primerset", "tag", "sequence", "vessel", "well"])
                        header = map(lambda x: x.lower(), line.rstrip().split("\t"))
                        try:
                            assert not minimalHeader.difference(set(header))
                        except:
                            print >> sys.stderr, "ERROR: Missing columns (%s)" % ",".join(
                                list(minimalHeader.difference(set(header)))
                            )
                            raise Exception("FileHeaderError")
                    else:
                        f = map(lambda x: x.strip('"'), line.rstrip().split("\t"))
                        l = dict(zip(header, f))
                        # remove tag from sequence
                        if l["tag"]:
                            try:
                                tagseqs = config["ordersheet"]["sequencetags"][l["tag"]]["tags"]
                            except:
                                pass
                            else:
                                for t in tagseqs:
                                    if l["sequence"].startswith(t):
                                        l["sequence"] = l["sequence"][len(t) :]
                                        break
                        # store metadata and write fasta
                        if l["primername"] in primerseqs.keys():
                            try:
                                assert l["sequence"] == primerseqs[l["primername"]]
                                assert l["tag"] == primertags[l["primername"]]
                            except:
                                print >> sys.stderr, l["primername"]
                                print >> sys.stderr, primerseqs[l["primername"]]
                                print >> sys.stderr, primertags[l["primername"]]
                                raise Exception("ImportFormattingError")
                        else:
                            print >> outfh, ">" + l["primername"]
                            print >> outfh, l["sequence"]
                        if l["primerset"]:
                            primersets[l["primername"]].append(l["primerset"])
                        primertags[l["primername"]] = l["tag"]
                        primerseqs[l["primername"]] = l["sequence"]
        primerfile = MultiFasta(fastafile)
    else:
        primerfile = MultiFasta(inputfile)
        # set default tags for import
        for r in primerfile.references:
            primertags[r] = config["import"]["tag"]
    print >> sys.stderr, "Placing primers on genome..."
    # Align primers to genome
    primers = primerfile.createPrimers(
        config["design"]["bowtieindex"],
        delete=False,
        tags=primertags,
        tmThreshold=config["design"]["mispriming"]["minimaltm"],
        endMatch=config["design"]["mispriming"]["identity3prime"],
    )  # places in genome
    # pair primers (by name or by primerset) MAKE COPIES!!!!
    pairs = {}
    for p in primers:
        setnames = (
            primersets[p.name]
            if p.name in primersets.keys() and len(primersets[p.name]) > 0
            else [parsePrimerName(p.name)[0]]
        )
        for setname in setnames:
            try:
                pairs[setname]
            except KeyError:
                try:
                    pairs[setname] = PrimerPair([None, None], name=setname)
                except:
                    print >> sys.stderr, ">>", primersets[p.name], "|", p.name, "|", setnames, "<"
                    raise
            except:
                raise
            # get primer orientation (might be wrong if guesses from name, will correct after)
            ## this basically just makes sure primers get paired (one fwd, one reverse)
            reverse = p.targetposition.reverse if p.targetposition else parsePrimerName(p.name)[1] < 0
            try:
                if reverse and pairs[setname][1] is None:
                    pairs[setname][1] = deepcopy(p)
                else:
                    if pairs[setname][0] is None:
                        pairs[setname][0] = deepcopy(p)
                    else:
                        assert pairs[setname][1] is None
                        pairs[setname][1] = deepcopy(p)
            except:
                print >> sys.stderr, "ERROR: Primer pair strand conflict?"
                print >> sys.stderr, "PRIMER0", pairs[setname][0]
                print >> sys.stderr, "PRIMER1", pairs[setname][1]
                print >> sys.stderr, "REVERSE", reverse
                print >> sys.stderr, "SETNAME", setname
                print >> sys.stderr, "PRIMER", p.name, parsePrimerName(p.name)
                print >> sys.stderr, "PAIRS", pairs[setname]
                raise
    # check if any unpaired primers
    for k, v in pairs.items():
        if not all(v):
            print >> sys.stderr, "WARNING: primer set %s is incomplete and skipped" % k
            del pairs[k]
    # prune ranks in primer3 mode (renames pair)
    if primer3:
        for p in pairs.values():
            assert p[0].targetposition and p[1].targetposition  # make sure target postiions are set
            p.pruneRanks()
        validPairs = pairs.values()
    else:  # guess target if not set
        validPairs = []
        print >> sys.stderr, "Identifying correct amplicons for unplaced primer pairs..."
        for p in pairs.values():
            if not p[0].targetposition or not p[1].targetposition:
                amplicons = p.amplicons(config["import"]["ampliconsize"], autoreverse=True)
                if amplicons:
                    shortest = sorted(amplicons, key=lambda x: len(x[2]))[0]  # sort amplicons by size
                    if len(amplicons) > 1:
                        print >> sys.stderr, "WARNING: multiple amplicons for {}. Assuming shortest ({}bp) is correct.".format(
                            p.name, str(len(shortest[2]))
                        )
                    p[0].targetposition = shortest[0]  # m
                    p[1].targetposition = shortest[1]  # n
                    validPairs.append(p)
                elif not primer3:
                    # try to find amplicon by sequence matching if no amplicons from genome mapping with sufficient Tm
                    refGenome = Genome(config["design"]["genome"])
                    # get new loci (one round)
                    newLoci = [[], []]
                    for mapped, query in [[0, 1], [1, 0]]:
                        for l in p[mapped].loci:
                            newLoci[query] += refGenome.primerMatch(l, p[query].seq, config["import"]["ampliconsize"])
                    # add new loci
                    if not newLoci[0] and not newLoci[1]:
                        print >> sys.stderr, "WARNING: {} is not specific and not imported ({},{})".format(
                            p.name, len(p[0].loci), len(p[1].loci)
                        )
                        continue
                    else:  # add new loci
                        for i, loc in enumerate(newLoci):
                            p[i].loci += loc
                            p[i].loci = list(set(p[i].loci))  # remove redundancy
                    # store new amplicon
                    amplicons = p.amplicons(config["import"]["ampliconsize"], autoreverse=True)
                    if amplicons:
                        p[0].targetposition = amplicons[0][0]  # m
                        p[1].targetposition = amplicons[0][1]  # n
                        validPairs.append(p)
                    else:
                        print >> sys.stderr, "\n".join(["-> " + str(l) for l in p[0].loci])
                        print >> sys.stderr, "\n".join(["<- " + str(l) for l in p[1].loci])
                        print >> sys.stderr, "WARNING: Primer set {} has no valid amplicons ({},{})".format(
                            p.name, len(p[0].loci), len(p[1].loci)
                        )
                else:
                    print >> sys.stderr, "WARNING: Primer set {} does not produce a well-sized, unique amplicon ({},{})".format(
                        p.name, len(p[0].loci), len(p[1].loci)
                    )
            else:
                validPairs.append(p)
    return validPairs