Python create_document_from_raw_text Examples, stanfordhelper.create_document_from_raw_text Python Examples

Example #1

0

Show file

def main():
    VERBOSE_OUTPUT = False
    NO_COREF = False
    segments = []
    files = []
    path = '.'
    if len(sys.argv) >= 2:
        path = sys.argv[1]
        if not path.endswith('/'):
            path += '/'
        if not os.path.isdir(path):
            print "provide a folder with the txt files"
            sys.exit()
    # Read a list of text segments
    for fname in sorted(os.listdir(path)):
        if not fname.lower().endswith('.txt'): continue
        files.append(fname)
        segments.append(re.sub(r"\".*?\"", "QUOTE", open(path + fname).read()))
    # Create and parse a document with all the segments
    text = '\n'.join(segments)
    doc = stanfordhelper.create_document_from_raw_text(text, {'cache': False})
    doc.compute_predictions()
    if NO_COREF: clear_coref(doc)
    if VERBOSE_OUTPUT:
        print doc
        print_adj_matrix(doc_to_graph(doc), doc)
    # Annotate segments as scenes and print the graph for each segment
    annotate_segments_as_scenes(doc, segments)
    for i, fname in enumerate(files):
        print fname
        print_adj_matrix(doc_to_graph(doc, filter_scene=i), doc)

Example #2

0

Show file

def demo2():
    text = "One morning, Bob met Alice for brunch but she didn't eat her food."
    doc = stanfordhelper.create_document_from_raw_text(text)
    print doc
    doc.compute_predictions()
    g = doc_to_graph(doc)
    print_adj_matrix(g)

Example #3

0

Show file

File: standalone-frontend.py Project: orestes1986/voz2b

def call_voz(text,
             doc_source='stanfordhelper',
             doc_method='create_document_from_raw_text',
             options={}):
    doc = stanfordhelper.create_document_from_raw_text(text)
    options = dict(get_default_options(), **options)
    return formatter.html(
        formatter.VozHTMLFormatter.format(doc, options=options))

Example #4

0

Show file

def demo():
    logging.basicConfig(level=logging.DEBUG)
    settings.SERIALIZE_PRETTY_JSON_BY_DEFAULT = True
    file_path = "/Users/josepvalls/voz2/data/"
    story_file = "TestInput.txt"
    text = open(file_path + story_file).read()
    doc = stanfordhelper.create_document_from_raw_text(text)
    print doc
    doc.compute_predictions()
    print_adj_matrix(doc_to_graph(doc))

Example #5

0

Show file

File: parse_tree_mention_helper.py Project: orestes1986/voz2b

def main():
    logging.basicConfig(level=logging.DEBUG)
    settings.SERIALIZE_PRETTY_JSON_BY_DEFAULT = True
    #doc = stanfordhelper.create_document_from_raw_text("The master of the ship found a box of jewels containing diamonds, silver and gold which were lost by a widower with his daughter and small son who lived in the hollow of his hand.")
    doc = stanfordhelper.create_document_from_raw_text(
        "Alice, Bob and Charlie are friends and they have lots of fun and it is nice."
    )
    print[(i, i.get_text()) for i in doc.sentences[0].mentions
          if i.is_independent]
    print[(i, i.get_text()) for i in doc.sentences[0].mentions]
    print[
        i.format({'display_tokens': ['lemma', 'pos', 'idx']})
        for i in doc.sentences[0].tokens
    ]

Example #6

0

Show file

File: web.py Project: orestes1986/voz2b

 def post(self):
     try:
         for k in dir(settings):
             if k == 'OPTIONS': continue
             if not k.upper() == k: continue
             try:
                 if isinstance(getattr(settings, k), bool):
                     setattr(
                         settings, k,
                         True if self.request.get(k) == 'True' else False)
                 elif k and isinstance(getattr(settings, k), int):
                     setattr(settings, k, int(self.request.get(k)))
                 else:
                     setattr(settings, k, self.request.get(k))
             except Exception as e:
                 self.response.write(
                     'Error setting parameters in the default form: ' +
                     str(e))
                 return
         text = self.request.get('text')
         if self.request.get_all('story'):
             body = 'Not supported yet'
         else:
             body = ''
             document = stanfordhelper.create_document_from_raw_text(
                 text, {'cache': False})
             document.compute_predictions()
             body += formatter.VozHTMLFormatter.format(
                 document,
                 options={
                     'include_parse': True,
                     'parse_highlight': parse_tree_mention_helper.
                     HIGHLIGHT_MENTIONS_INDEPENDNET,
                     'include_raw': False,
                     'include_text': True,
                     'text_highlight': parse_tree_mention_helper.
                     HIGHLIGHT_MENTIONS_INDEPENDNET,
                     'include_mentions': False,
                     'include_verbs': True,
                 })
             g = graphhelper.doc_to_graph(document)
             body += formatter.VozHTMLFormatter.format_story_graph(g)
             body = formatter.html(body)
     except Exception as e:
         # body = str(e)
         body = '<pre>' + traceback.format_exc() + '</pre>'
     self.response.write(body)

Example #7

0

Show file

def main():
    NO_COREF = False
    filename = None
    if len(sys.argv) >= 2:
        filename = sys.argv[1]
        if not os.path.isfile(filename):
            print "provide a path to a txt file"
            sys.exit()
    else:
        print "provide a path to a txt file"
        sys.exit()

    text = re.sub(r"\".*?\"", "QUOTE", open(filename).read())
    doc = stanfordhelper.create_document_from_raw_text(text, {'cache': False})
    doc.compute_predictions()
    if NO_COREF: clear_coref(doc)
    print_verb_sequence(doc)

Example #8

0

Show file

File: web.py Project: orestes1986/voz2b

 def post(self, default=False):
     # try:
     if True:
         if not default:
             params = json.loads(self.request.body)
         else:
             params = {}
         text = params.get('text', BASE_TEXT)
         cmd = params.get('cmd',
                          '[i.get_text() for i in document.sentences]')
         document = stanfordhelper.create_document_from_raw_text(
             text, {'cache': False})
         resp = eval(cmd)
         # except Exception as e:
         # import traceback
         # resp = '['+str(traceback.format_exc())+']'
         # resp = str(e)
     self.response.headers['Content-Type'] = 'application/json'
     self.response.write(
         json.dumps(resp, sort_keys=True, indent=2,
                    cls=JSONDateTimeEncoder))

Example #9

0

Show file

def create_document_from_story_data(story_data, properties={}, annotate=True):
    import stanfordhelper
    raw_text, character_mentions, character_roles, story_data, functions = story_stats(
        properties['story_id'], story_data)
    import entitymanager
    stats_not_found = 0
    stats_ambiguous = 0
    stats_match_ok = 0
    doc = stanfordhelper.create_document_from_raw_text(raw_text, properties)
    # TODO clean Stanford stuff
    for row in story_data:
        function, text, annotations, offset_start, offset_end = row
        raw_text += text + ' '
        for annotation in annotations:
            verb, msubj, mobj = annotation
            vargs = None
            try:
                token = doc.get_token_by_off(verb[2])
                vargs = doc.get_verb_by_token_id(token.id).arguments
            except:
                pass
            if vargs is not None:
                if msubj:
                    key, neg, o_start, o_end, extra = msubj
                    token = doc.get_token_by_off(o_start)
                    mention = doc.get_mention_by_token_id(token.id)
                    if mention:
                        vargs['nsubj'] = mention.tokens
                if mobj:
                    key, neg, o_start, o_end, extra = mobj
                    token = doc.get_token_by_off(o_start)
                    mention = doc.get_mention_by_token_id(token.id)
                    if mention:
                        vargs['dobj'] = mention.tokens

    # Annotate coref and roles
    for character_id, character in enumerate(character_mentions):
        for mention_data in character_mentions[character]:
            token = doc.get_token_by_off(mention_data[1])
            mention = doc.get_mention_by_token_id(token.id)
            #print mention_data,token,mention,character_id
            if mention:
                mention.is_independent = True
                mention.annotations.coref = character_id + 1000
                mention.annotations.character = True
                mention.annotations.type = 'animate'
                if character_roles.get(character, None):
                    mention.annotations.role = synthetic_dataset_roles.get(
                        mention_data[0], 'Other')
                    # TODO set the rest of mentions to NA
                if annotate:
                    mention.predictions.coref = character_id + 1000
                    mention.predictions.character = True
                    mention.annotations.type = 'animate'
                    if character_roles.get(character, None):
                        mention.predictions.role = synthetic_dataset_roles.get(
                            mention_data[0], 'Other')
                        # TODO set the rest of mentions to NA
    # Annotate functions
    for function, offset_start, offset_end in functions:
        doc.narrative.add_function(
            doc.get_new_id("Function"), offset_start,
            offset_end - offset_start, function, [
                narrativehelper.NarrativeFunctionLocation(
                    'ACTUAL', [
                        i.id for i in doc.get_tokens_by_off_len(
                            offset_start, offset_end)
                    ])
            ])
    return doc