コード例 #1
0
    def read(self, f, models, weights):
        if type(f) is str:
            if os.path.isfile(f):
                if log.level >= 1:
                    log.write("Reading grammar from %s...\n" % f)
                f = file(f, 'r', 4*1024*1024)
            elif os.path.isfile("%s.gz" % f):
                f = "%s.gz" % f
                if log.level >= 1:
                    log.write("Decompressing grammar from %s...\n" % f)
                f = file(f, 'r', 4*1024*1024)
                f = gzip.GzipFile(fileobj=f)
        else:
            if log.level >= 1:
                log.write("Reading grammar...\n")

        for line in f:
            try:
                r = rule.rule_from_line(line)
            except Exception:
                log.write("warning: couldn't scan rule %s\n" % line.strip())
                continue
            estcost = estimate_rule(r, models, weights)
            self.add(r, estcost)
            # self.add(rule.rule_from_line(line)) # this once caused a segfault
        log.write("%d rules read\n" % self.count)
コード例 #2
0
ファイル: scorer.py プロジェクト: fullstackenviormentss/sbmt
def read_rules(files):
    """Merge several grammar files together (assuming they are sorted)."""

    if len(files) == 1:
        for line in files[0]:
            try:
                (handle, ruleline) = line.split("|||", 1)
                r = rule.rule_from_line(ruleline)
                yield handle, r
            except:
                sys.stderr.write("couldn't scan line: %s\n" % line.strip())
        return
    
    heap = []
    for f in files:
        try:
            line = f.next()
        except StopIteration:
            pass
        else:
            heap.append((line, f))
    heapq.heapify(heap)

    while len(heap) > 0:
        (line, f) = heapq.heappop(heap)

        try:
            (handle, ruleline) = line.split("|||", 1)
            r = rule.rule_from_line(ruleline)
        except:
            sys.stderr.write("couldn't scan line: %s\n" % line.strip())
            r = None

        if r is not None and len(r.scores) < 1:
            sys.stderr.write("rule doesn't have enough scores: %s\n" % str(r))
            r = None

        if r is not None:
            yield handle, r
            
        try:
            line = f.next()
        except StopIteration:
            pass
        else:
            heapq.heappush(heap, (line, f))
コード例 #3
0
ファイル: scorer.py プロジェクト: jungikim/sbmt
def read_rules(files):
    """Merge several grammar files together (assuming they are sorted)."""

    if len(files) == 1:
        for line in files[0]:
            try:
                (handle, ruleline) = line.split("|||", 1)
                r = rule.rule_from_line(ruleline)
                yield handle, r
            except:
                sys.stderr.write("couldn't scan line: %s\n" % line.strip())
        return

    heap = []
    for f in files:
        try:
            line = f.next()
        except StopIteration:
            pass
        else:
            heap.append((line, f))
    heapq.heapify(heap)

    while len(heap) > 0:
        (line, f) = heapq.heappop(heap)

        try:
            (handle, ruleline) = line.split("|||", 1)
            r = rule.rule_from_line(ruleline)
        except:
            sys.stderr.write("couldn't scan line: %s\n" % line.strip())
            r = None

        if r is not None and len(r.scores) < 1:
            sys.stderr.write("rule doesn't have enough scores: %s\n" % str(r))
            r = None

        if r is not None:
            yield handle, r

        try:
            line = f.next()
        except StopIteration:
            pass
        else:
            heapq.heappush(heap, (line, f))
コード例 #4
0
        
    return ll(c12,c1,p) + ll(c2-c12,n-c1,p) - ll(c12,c1,p1) - ll(c2-c12,n-c1,p2)

if __name__ == "__main__":
    import rule

    threshold = 1e-8
    fweightfile = sys.argv[1]
    eweightfile = sys.argv[2]

    fweighttable = read_weightfile(file(fweightfile), threshold=threshold)
    eweighttable = read_weightfile(file(eweightfile), threshold=threshold)

    progress = 0
    for line in sys.stdin:
        r = rule.rule_from_line(line)
        if r.word_alignments is None:
            scores = r.scores
            scores.extend([scores[0],scores[0]])
            r.scores = scores
            sys.stdout.write("%s\n" % r.to_line())
            progress += 1
            continue
        
        align = set(r.word_alignments)

        fweight = eweight = 1.0
        
        for fi in xrange(len(r.f)):
            if not sym.isvar(r.f[fi]):
                fwordweight = 0.
コード例 #5
0
        c2 - c12, n - c1, p2)


if __name__ == "__main__":
    import rule

    threshold = 1e-8
    fweightfile = sys.argv[1]
    eweightfile = sys.argv[2]

    fweighttable = read_weightfile(file(fweightfile), threshold=threshold)
    eweighttable = read_weightfile(file(eweightfile), threshold=threshold)

    progress = 0
    for line in sys.stdin:
        r = rule.rule_from_line(line)
        if r.word_alignments is None:
            scores = r.scores
            scores.extend([scores[0], scores[0]])
            r.scores = scores
            sys.stdout.write("%s\n" % r.to_line())
            progress += 1
            continue

        align = set(r.word_alignments)

        fweight = eweight = 1.0

        for fi in xrange(len(r.f)):
            if not sym.isvar(r.f[fi]):
                fwordweight = 0.