예제 #1
0
파일: readdoc.py 프로젝트: OlafLee/DPLP
def readdoc(rpath, fdocdict):
    files = [join(rpath, fname) for fname in listdir(rpath) if fname.endswith(".merge")]
    dr = DocReader()
    docdict = {}
    for fmerge in files:
        doc = dr.read(fmerge)
        docdict[fmerge] = doc
    print 'Write doc dict into {}'.format(fdocdict)
    with gzip.open(fdocdict, 'w') as fout:
        dump(docdict, fout)
예제 #2
0
def readdoc(rpath, fdocdict):
    files = [
        join(rpath, fname) for fname in listdir(rpath)
        if fname.endswith(".merge")
    ]
    dr = DocReader()
    docdict = {}
    for fmerge in files:
        doc = dr.read(fmerge)
        docdict[fmerge] = doc
    print 'Write doc dict into {}'.format(fdocdict)
    with gzip.open(fdocdict, 'w') as fout:
        dump(docdict, fout)
예제 #3
0
def evalparser(path='./examples', report=False, 
               bcvocab=None, draw=True,
               withdp=False, fdpvocab=None, fprojmat=None):
    """ Test the parsing performance

    :type path: string
    :param path: path to the evaluation data

    :type report: boolean
    :param report: whether to report (calculate) the f1 score
    """
    # ----------------------------------------
    # Load the parsing model
    print 'Load parsing model ...'
    pm = ParsingModel(withdp=withdp,
        fdpvocab=fdpvocab, fprojmat=fprojmat)
    pm.loadmodel("model/parsing-model.pickle.gz")
    # ----------------------------------------
    # Evaluation
    met = Metrics(levels=['span','nuclearity','relation'])
    # ----------------------------------------
    # Read all files from the given path
    doclist = [joinpath(path, fname) for fname in listdir(path) if fname.endswith('.merge')]
    for fmerge in doclist:
        # ----------------------------------------
        # Read *.merge file
        dr = DocReader()
        doc = dr.read(fmerge)
        # ----------------------------------------
        # Parsing
        pred_rst = pm.sr_parse(doc, bcvocab)
        if draw:
            strtree = pred_rst.parse()
            drawrst(strtree, fmerge.replace(".merge",".ps"))
        # Get brackets from parsing results
        pred_brackets = pred_rst.bracketing()
        fbrackets = fmerge.replace('.merge', '.brackets')
        # Write brackets into file
        writebrackets(fbrackets, pred_brackets)
        # ----------------------------------------
        # Evaluate with gold RST tree
        if report:
            fdis = fmerge.replace('.merge', '.dis')
            gold_rst = RSTTree(fdis, fmerge)
            gold_rst.build()
            gold_brackets = gold_rst.bracketing()
            met.eval(gold_rst, pred_rst)
    if report:
        met.report()
예제 #4
0
 def build(self):
     """ Build BINARY RST tree
     """
     text = open(self.fdis).read()
     # Build RST as annotation
     self.tree = buildtree(text)
     # Binarize it
     self.tree = binarizetree(self.tree)
     # Read doc file
     if isfile(self.fmerge):
         dr = DocReader()
         self.doc = dr.read(self.fmerge)
     else:
         raise IOError("File doesn't exist: {}".format(self.fmerge))
     # Prop information from doc on the binarized RST tree
     self.tree = backprop(self.tree, self.doc)
예제 #5
0
파일: tree.py 프로젝트: OlafLee/DPLP
 def build(self):
     """ Build BINARY RST tree
     """
     text = open(self.fdis).read()
     # Build RST as annotation
     self.tree = buildtree(text)
     # Binarize it
     self.tree = binarizetree(self.tree)
     # Read doc file
     if isfile(self.fmerge):
         dr = DocReader()
         self.doc = dr.read(self.fmerge)
     else:
         raise IOError("File doesn't exist: {}".format(self.fmerge))
     # Prop information from doc on the binarized RST tree
     self.tree = backprop(self.tree, self.doc)
예제 #6
0
def eval_parser_unit(fmerge, bcvocab=None, pm=None, draw=False):
    bcvocab = global_bv
    pm = global_pm
    assert bcvocab is not None
    assert pm is not None
    dr = DocReader()
    doc = dr.read(fmerge)
    # ----------------------------------------
    # Parsing
    pred_rst = pm.sr_parse(doc, bcvocab)
    if draw:
        strtree = pred_rst.parse()
        drawrst(strtree, fmerge.replace(".merge", ".ps"))
    # Get brackets from parsing results
    pred_brackets = pred_rst.bracketing()
    fbrackets = fmerge.replace('.merge', '.brackets')
    # Write brackets into file
    writebrackets(fbrackets, pred_brackets)
예제 #7
0
파일: main.py 프로젝트: fpt/webtoys
def diff_compare_files():
    def is_docx(file):
        docx_mime = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
        return file.filename.endswith(
            '.docx') and file.content_type == docx_mime

    def is_text(file):
        return file.content_type.startswith('text/')

    if request.method == 'POST':
        files = []
        for k, v in request.files.items():
            if k.startswith('file'):
                files.append(v)
        if not files or len(files) != 2:
            flash('Not enough files')
            return redirect('/diff')
        for f in files:
            if not f.filename:
                flash('No selected file')
                return redirect('/diff')

        file1, file2 = files
        body1 = None
        body2 = None
        print(file1)
        print(file2)
        if is_docx(file1) and is_docx(file2):
            dr = DocReader()
            body1 = list(dr.process(file1.stream))
            body2 = list(dr.process(file2.stream))
        elif is_text(file1) and is_text(file2):
            body1 = file1.stream.read().decode("utf-8").splitlines()
            body2 = file2.stream.read().decode("utf-8").splitlines()
        else:
            flash('unsupported.')

        return diff_compare(files[0].filename, body1, files[1].filename, body2)