def ppsOut2Placements(ppsOutFile, scafContigFile=None):
    """
        Transforms a PPS assignments to a list of pairs <contigName, assigned_ncbid>

        @param ppsOutFile: PPS output file where the first column is the contig/scaffold name and the last column is ncbid
        @param scafContigFile: scaffold contig mapping (tab separated) if None then all sequences are considered as contigs

        @return: list of pairs <contigName, assigned_ncbid>
    """

    #print 'ppsOut2Placements ppsOutFile:', ppsOutFile
    #print 'ppsOut2Placements scafContigFile:', scafContigFile

    if scafContigFile != None:
        scafToContigs = toScafContigMap(scafContigFile)
    else:
        scafToContigs = dict([])

    outList = []
    try:
        f = open(os.path.normpath(ppsOutFile),'r')
    except Exception:
        print "Cannot open file:", ppsOutFile
        raise
    else:
        lineCounter = 0
        for line in f:
            lineCounter += 1
            line = common.noNewLine(line)
            name = re.sub(r'^([^ \t]+)[ \t]+.*[0-9]+[ \t]*$',r'\1' ,line)
            try:
                ncbid = int(re.sub(r'^[^ \t]+.*[ \t]+([0-9]+)[ \t]*$',r'\1' ,line))
            except Exception:
                try:
                    ncbid = abs(int(re.sub(r'^[^ \t]+.*[ \t]+(-1)[ \t]*$',r'\1' ,line)))
                except Exception:
                    print 'ppsOut2Placements: cannot parse placement for line nr:', lineCounter, 'line:', line
                    raise

            if name in scafToContigs:
                contigsList = scafToContigs[name]
                for contig in contigsList:
                    outList.append([contig, ncbid])
                    #print ':',contig,ncbid
            else:
                outList.append([name, ncbid])
                #print '',name,ncbid

    return outList
def ssd2Placements(ssdDir, scafContigFile=None):
    """
        Transforms sample specific data to placements. Sequences` names are not allowed to have gaps ' '

        @param ssdDir: directory that contains sample specific data
        @param scafContigFile: scaffold contig mapping (tab separated) if None then all sequences are considered as contigs

        @return: list of pairs <contigName, assigned_ncbid>
    """

    #collect map: scaffold -> list of contigs
    if scafContigFile != None:
        scafToContigs = toScafContigMap(scafContigFile)
    else:
        scafToContigs = dict([])

    outList = []
    placedContigs = set([])

    for filePath in glob.glob(os.path.join(os.path.normpath(ssdDir),r'*.f[an][sa]')):
        ncbid = int(re.sub(r'^.*[^0-9]([0-9]+)\.[0-9]+\.f[an][sa]$',r'\1' ,filePath)) #int
        try:
            f = open(os.path.normpath(filePath),'r')
        except Exception:
            print "Cannot open file:", filePath
            raise
        else:
            for line in f:
                line = common.noNewLine(line)
                if re.match('>', line):
                    name = re.sub(r'^([^ \t]+)[ \t]*.*$',r'\1',line.replace('>',''))
                    if name in scafToContigs:
                        contigsList = scafToContigs[name]
                    else:
                        contigsList = [name]
                    for contig in contigsList:
                        if contig in placedContigs:
                            print str('contig "' + contig + '" has already been placed')
                        else:
                            placedContigs.add(contig)
                            outList.append([contig, ncbid])
        #count also BP for each contig!!!

    return outList