Python funornone Examples

Programming Language: Python

Namespace/Package Name: lputil

Method/Function: funornone

Examples at hotexamples.com: 3

Python funornone - 3 examples found. These are the top rated real world Python examples of lputil.funornone extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: extract_entity_annotation.py Project: isi-nlp/elisatools

 for xann in xdoc.findall("ANNOTATION"):
   annid = xann.get("id")
   annset[annid] = xann
   anntask = xann.get("task")
   if xann.find("EXTENT") is None:
     if anntask != "FE":
       sys.stderr.write("Warning: extent-free non-full annotation in "+annfile+": "+ET.tostring(xann)+"\n")
       continue
     continue
   xextent = xann.find("EXTENT")
   tup = [anntask, docid, xextent.get("start_char") or "None", xextent.get("end_char") or "None", annid or "None", xextent.text or "None"]
   if anntask == "NE": # simple ne annotation
     tup.append(xann.get("type"))
   else: # everything else has category/tag style
     try:
       tup.append(funornone(xann.find("CATEGORY"), lambda x: x.text))
       tup.append(funornone(xann.find("TAG"), lambda x: x.text))
       if (anntask == "FE"):
         if xann.find("ENTITY") is not None:
           eid = xann.find("ENTITY").get("entity_id")
           tup.append(eid)
           tup.append(annset[eid].find("TAG").text if annset[eid].find("TAG") is not None else "NONE")
         elif xann.find("PHRASE") is not None:
           tup.append(xann.find("PHRASE").get("phrase_id"))
         else:
           sys.stderr("Expected ENTITY or PHRASE at "+annfile+"; "+ET.tostring(xann))
           continue
       elif (anntask == "SSA"):
         if xann.find("PREDICATE") is not None:
           tup.append(xann.find("PREDICATE").get("predicate_id"))
       else:

Example #2

Show file

File: extract_entity_annotation.py Project: panx27/elisatools

def main():
  import codecs
  parser = argparse.ArgumentParser(description="Extract and print laf annotat" \
                                   "ion data from LRLP in a form that is amen" \
                                   "able to insertion into future xml",
                                   formatter_class= \
                                   argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument("--rootdir", "-r", default=".", help="root lrlp dir")
  # parser.add_argument("--outfile", "-o",
  #                     type=argparse.FileType('w'), default=sys.stdout,
  #                     help="where to write extracted semantic info")
  parser.add_argument("--outfile", "-o", help="where to write")
  parser.add_argument("--extwtdir", "-et", help="extracted tweet rsd files dir")

  try:
    args = parser.parse_args()
  except IOError as msg:
    parser.error(str(msg))

  outfile = open(args.outfile, 'w')
  twtdir = args.extwtdir
  anndir = os.path.join(args.rootdir, 'data', 'annotation')
  if not os.path.exists(anndir):
    sys.stderr.write("No annotation directory found\n")
    sys.exit(0)
  # print anndir
  for annfile in recursive_file_gen(anndir):
    if annfile.endswith("laf.xml") and \
       not os.path.basename(annfile).startswith("."):
      try:
        xobj = ET.parse(annfile)
      except:
        sys.stderr.write("Problem parsing "+annfile+"\n")
        continue

      for xdoc in xobj.findall("DOC"):
        docid = xdoc.get("id")
        if docid.startswith('doc-'): # In NPC annotation, LDC uses "doc-n"
                                     # instead of original docid
            docid = os.path.basename(annfile).replace('.laf.xml', '')
        if docid.startswith('SN_TWT_'): # No string head for TWT, need rsd file
            if not os.path.isfile('%s/%s.rsd.txt' % (twtdir, docid)):
                continue

        # Store all annotations by id. if they have an extent, spit them out.
        # if no extent, check they are entities; nothing else should be
        # extent-free PREDICATE, ENTITY, and PHRASE are cross references to ids;
        # for ENTITY the core type is copied, for everything else just the cross
        # reference
        annset = {}
        for xann in xdoc.findall("ANNOTATION"):
          annid = xann.get("id")
          if annid.startswith('doc-'):
              annid = re.sub('doc-\d+', docid, annid)
          annset[annid] = xann
          anntask = xann.get("task")
          if xann.find("EXTENT") is None:
            if anntask != "FE" and anntask != "SSA":
              sys.stderr.write("Warning: extent-free non-full annotation in " \
                               +annfile+": "+ET.tostring(xann)+"\n")
            continue
          # map aberrant type
          if anntask == "NPchunk":
            anntask = "NPC"
          xextent = xann.find('EXTENT')
          if docid.startswith('SN_TWT_'): # No string head for TWT
              strhead = xextent.text
              tweet = open('%s/%s.rsd.txt' % (twtdir, docid)).read()
              beg = int(xextent.get("start_char"))
              end = int(xextent.get("end_char"))
              # but don't go negative
              if beg < 0 or end > len(tweet):
                  sys.stderr.write(annfile+" Bad offsets: can't do %d, %d on %s\n" % (beg, end, docid))
                  continue
              strhead = tweet[beg:end+1]
              tup = [anntask, docid, xextent.get("start_char") or "None",
                     xextent.get("end_char") or "None", annid or "None",
                     strhead or "None"]
          else:
              tup = [anntask, docid, xextent.get("start_char") or "None",
                     xextent.get("end_char") or "None", annid or "None",
                     xextent.text or "None"]
          if anntask == "NE": # Simple ne annotation
            # old style: in attributes. new style: in tag
            if "type" in xann.keys():
              tup.append(xann.get("type"))
            else:
              tup.append(funornone(xann.find("TAG"), lambda x: x.text))
          elif (anntask == "NPC" or anntask=="NPchunk"): # NP chunking
            # old style: in attributes. new style: in tag
            if "type" in xann.keys():
              tup.append(xann.get("type"))
            else:
              tup.append(funornone(xann.find("TAG"), lambda x: x.text))
          else: # Everything else has category/tag style
            try:
              tup.append(funornone(xann.find("CATEGORY"), lambda x: x.text))
              tup.append(funornone(xann.find("TAG"), lambda x: x.text))
              if (anntask == "FE"):
                if xann.find("ENTITY") is not None:
                  eid = xann.find("ENTITY").get("entity_id")
                  tup.append(eid)
                  tup.append(annset[eid].find("TAG").text if \
                             annset[eid].find("TAG") is not None else "NONE")
                elif xann.find("PHRASE") is not None:
                  tup.append(xann.find("PHRASE").get("phrase_id"))
                else:
                  sys.stderr("Expected ENTITY or PHRASE at "\
                             +annfile+"; "+ET.tostring(xann))
                  continue
              elif (anntask == "SSA"):
                if xann.find("PREDICATE") is not None:
                  tup.append(xann.find("PREDICATE").get("predicate_id"))
              else:
                sys.stderr.write(annfile+": Don't know how to process "\
                                 +anntask+"\n")
                continue
            except:
              print(annfile)
              print(ET.tostring(xann))
              raise
          try:
              outfile.write("\t".join(map(str,tup))+"\n")
          except UnicodeDecodeError:
              sys.stderr.write("Warning: Unknown encoding %s:%s-%s\n" % \
                               (tup[4], tup[2], tup[3]))

Example #3

Show file

File: extract_entity_annotation.py Project: afcarl/elisatools

def main():
    import codecs
    parser = argparse.ArgumentParser(description="Extract and print laf annotat" \
                                     "ion data from LRLP in a form that is amen" \
                                     "able to insertion into future xml",
                                     formatter_class= \
                                     argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--rootdir", "-r", default=".", help="root lrlp dir")
    # parser.add_argument("--outfile", "-o",
    #                     type=argparse.FileType('w'), default=sys.stdout,
    #                     help="where to write extracted semantic info")
    parser.add_argument("--outfile", "-o", help="where to write")
    parser.add_argument("--extwtdir",
                        "-et",
                        default=None,
                        help="extracted tweet rsd files dir")

    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    outfile = open(args.outfile, 'w')
    twtdir = args.extwtdir
    anndir = os.path.join(args.rootdir, 'data', 'annotation')
    if not os.path.exists(anndir):
        sys.stderr.write("No annotation directory found\n")
        sys.exit(0)
    if twtdir is not None and not os.path.exists(twtdir):
        sys.stderr.write("Warning: no {}\n".format(twtdir))
        twtdir = None
    # print anndir
    for annfile in recursive_file_gen(anndir):
        if annfile.endswith("laf.xml") and \
           not os.path.basename(annfile).startswith("."):
            try:
                xobj = ET.parse(annfile)
            except:
                sys.stderr.write("Problem parsing " + annfile + "\n")
                continue

            for xdoc in xobj.findall("DOC"):
                docid = xdoc.get("id")
                if docid.startswith(
                        'doc-'):  # In NPC annotation, LDC uses "doc-n"
                    # instead of original docid
                    docid = os.path.basename(annfile).replace('.laf.xml', '')
                if is_sn(
                        docid
                ) and twtdir is not None:  # No string head for TWT, need rsd file
                    if not os.path.isfile('%s/%s.rsd.txt' % (twtdir, docid)):
                        continue

                # Store all annotations by id. if they have an extent, spit them out.
                # if no extent, check they are entities; nothing else should be
                # extent-free PREDICATE, ENTITY, and PHRASE are cross references to ids;
                # for ENTITY the core type is copied, for everything else just the cross
                # reference
                annset = {}
                for xann in xdoc.findall("ANNOTATION"):
                    annid = xann.get("id")
                    if annid.startswith('doc-'):
                        annid = re.sub('doc-\d+', docid, annid)
                    annset[annid] = xann
                    anntask = xann.get("task")
                    if xann.find("EXTENT") is None:
                        if anntask != "FE" and anntask != "SSA":
                            sys.stderr.write("Warning: extent-free non-full annotation in " \
                                             +annfile+": "+ET.tostring(xann)+"\n")
                        continue
                    # map aberrant type
                    if anntask == "NPchunk":
                        anntask = "NPC"
                    xextent = xann.find('EXTENT')
                    try:
                        if is_sn(
                                docid
                        ) and twtdir is not None:  # No string head for TWT
                            strhead = xextent.text
                            tweet = open('%s/%s.rsd.txt' %
                                         (twtdir, docid)).read()
                            beg = int(xextent.get("start_char"))
                            end = int(xextent.get("end_char"))
                            # but don't go negative
                            if beg < 0 or end > len(tweet):
                                sys.stderr.write(
                                    annfile +
                                    " Bad offsets: can't do %d, %d on %s\n" %
                                    (beg, end, docid))
                                continue
                            strhead = tweet[beg:end + 1]
                            tup = [
                                anntask, docid,
                                str(int(xextent.get("start_char"))) or "None",
                                xextent.get("end_char") or "None", annid
                                or "None", strhead or "None"
                            ]
                        else:
                            tup = [
                                anntask, docid,
                                str(int(xextent.get("start_char"))) or "None",
                                xextent.get("end_char") or "None", annid
                                or "None", xextent.text or "None"
                            ]
                    except:
                        sys.stderr.write(
                            "Trouble at %s in %s should be investigated\n" %
                            (docid, annfile))
                        continue

                    if anntask == "NE":  # Simple ne annotation
                        # old style: in attributes. new style: in tag
                        if "type" in xann.keys():
                            tup.append(xann.get("type"))
                        else:
                            tup.append(
                                funornone(xann.find("TAG"), lambda x: x.text))
                    elif (anntask == "NPC"
                          or anntask == "NPchunk"):  # NP chunking
                        # old style: in attributes. new style: in tag
                        if "type" in xann.keys():
                            tup.append(xann.get("type"))
                        else:
                            tup.append(
                                funornone(xann.find("TAG"), lambda x: x.text))
                    else:  # Everything else has category/tag style
                        try:
                            tup.append(
                                funornone(xann.find("CATEGORY"),
                                          lambda x: x.text))
                            tup.append(
                                funornone(xann.find("TAG"), lambda x: x.text))
                            if (anntask == "FE"):
                                if xann.find("ENTITY") is not None:
                                    eid = xann.find("ENTITY").get("entity_id")
                                    tup.append(eid)
                                    tup.append(annset[eid].find("TAG").text if \
                                               annset[eid].find("TAG") is not None else "NONE")
                                elif xann.find("PHRASE") is not None:
                                    tup.append(
                                        xann.find("PHRASE").get("phrase_id"))
                                else:
                                    sys.stderr("Expected ENTITY or PHRASE at "\
                                               +annfile+"; "+ET.tostring(xann))
                                    continue
                            elif (anntask == "SSA"):
                                if xann.find("PREDICATE") is not None:
                                    tup.append(
                                        xann.find("PREDICATE").get(
                                            "predicate_id"))
                            else:
                                sys.stderr.write(annfile+": Don't know how to process "\
                                                 +anntask+"\n")
                                continue
                        except:
                            print(annfile)
                            print(ET.tostring(xann))
                            raise
                    try:
                        outfile.write("\t".join(map(str, tup)) + "\n")
                    except UnicodeDecodeError:
                        sys.stderr.write("Warning: Unknown encoding %s:%s-%s\n" % \
                                         (tup[4], tup[2], tup[3]))