Example #1
0
 def erd (actual, predicted, delimiter='\t', idcol=0, catcol=2, NumRP=0):
     """Score one file against another.
     File format: ...<id>...<category>...
     """
     mlc = MuLabCat(util.title_from_2paths(actual,predicted),
                    reassign=False,NumRP=NumRP)
     adict = util.read_multimap(actual,delimiter,col1=idcol,col2=catcol,logger=MuLabCat.logger)
     pdict = util.read_multimap(predicted,delimiter,col1=idcol,col2=catcol,logger=MuLabCat.logger)
     for obj,acat in adict.iteritems():
         mlc.add(acat,pdict.get(obj,frozenset()))
     for obj,pcat in pdict.iteritems():
         if obj not in adict:
             mlc.add(frozenset(),pcat)
     return mlc
Example #2
0
 def score (actual, predicted, delimiter='\t', abeg=1, pbeg=1, NumRP=0):
     """Score one file against another.
     File format: <query string>[<delimiter><category>]+
     The queries in both files must be identical.
     abeg and pbeg are the columns where actual and pedicted categories start.
     """
     mlc = MuLabCat(util.title_from_2paths(actual,predicted),NumRP=NumRP)
     util.reading(actual,MuLabCat.logger)
     util.reading(predicted,MuLabCat.logger)
     empty = frozenset([''])
     with open(actual) as af, open(predicted) as pf:
         for sa,sp in itertools.izip_longest(csv.reader(af,delimiter=delimiter),
                                             csv.reader(pf,delimiter=delimiter)):
             if sa is None or sp is None:
                 raise ValueError("uneven files",af,pf,sa,sp)
             if sa[0] != sp[0]:
                 raise ValueError("query string mismatch",sa,sp)
             mlc.add(frozenset(sa[abeg:])-empty,frozenset(sp[pbeg:])-empty)
     return mlc
Example #3
0
 def score(actual, predicted, delimiter='\t', abeg=1, pbeg=1, NumRP=0):
     """Score one file against another.
     File format: <query string>[<delimiter><category>]+
     The queries in both files must be identical.
     abeg and pbeg are the columns where actual and pedicted categories start.
     """
     mlc = MuLabCat(util.title_from_2paths(actual, predicted), NumRP=NumRP)
     util.reading(actual, MuLabCat.logger)
     util.reading(predicted, MuLabCat.logger)
     empty = frozenset([''])
     with open(actual) as af, open(predicted) as pf:
         for sa, sp in itertools.izip_longest(
                 csv.reader(af, delimiter=delimiter),
                 csv.reader(pf, delimiter=delimiter)):
             if sa is None or sp is None:
                 raise ValueError("uneven files", af, pf, sa, sp)
             if sa[0] != sp[0]:
                 raise ValueError("query string mismatch", sa, sp)
             mlc.add(
                 frozenset(sa[abeg:]) - empty,
                 frozenset(sp[pbeg:]) - empty)
     return mlc
Example #4
0
 def erd(actual, predicted, delimiter='\t', idcol=0, catcol=2, NumRP=0):
     """Score one file against another.
     File format: ...<id>...<category>...
     """
     mlc = MuLabCat(util.title_from_2paths(actual, predicted),
                    reassign=False,
                    NumRP=NumRP)
     adict = util.read_multimap(actual,
                                delimiter,
                                col1=idcol,
                                col2=catcol,
                                logger=MuLabCat.logger)
     pdict = util.read_multimap(predicted,
                                delimiter,
                                col1=idcol,
                                col2=catcol,
                                logger=MuLabCat.logger)
     for obj, acat in adict.iteritems():
         mlc.add(acat, pdict.get(obj, frozenset()))
     for obj, pcat in pdict.iteritems():
         if obj not in adict:
             mlc.add(frozenset(), pcat)
     return mlc