Esempio n. 1
0
    def __init__(self, logger, positive_rel_filepath, negative_rel_filepath,
                 vocab):
        self.logger = logger

        self.pos_relations_parents = []
        self.pos_relations_children = []
        rels = Relations(positive_rel_filepath, reverse=False)
        for node_parent, node_child in rels:
            assert node_parent != node_child
            node_parent_idx = vocab[node_parent].index
            node_child_idx = vocab[node_child].index
            self.pos_relations_parents.append(node_parent_idx)
            self.pos_relations_children.append(node_child_idx)

        self.neg_relations_parents = []
        self.neg_relations_children = []
        rels = Relations(negative_rel_filepath, reverse=False)
        for node_parent, node_child in rels:
            assert node_parent != node_child
            node_parent_idx = vocab[node_parent].index
            node_child_idx = vocab[node_child].index
            self.neg_relations_parents.append(node_parent_idx)
            self.neg_relations_children.append(node_child_idx)

        logger.info('eval datasets file pos = ' + positive_rel_filepath +
                    '  neg = ' + negative_rel_filepath +
                    '; eval num rels pos = ' +
                    str(len(self.pos_relations_parents)) + '  neg = ' +
                    str(len(self.neg_relations_parents)))
Esempio n. 2
0
    def build_model(self):
        train_path = self.input()["data"]["train"].path
        train_data = Relations(train_path, reverse=False)

        cls = self.get_model_class()

        model = cls(train_data=train_data,
                dim=self.dim,
                init_range=(self.init_range_min, self.init_range_max),
                lr=self.lr,
                opt=self.opt,  # rsgd or exp_map
                burn_in=self.burn_in,
                seed=self.seed,

                num_negative=self.num_negative,
                neg_sampl_strategy=self.neg_sampl_strategy,
                where_not_to_sample=self.where_not_to_sample,
                neg_edges_attach=self.neg_edges_attach,
                always_v_in_neg = self.always_v_in_neg,
                neg_sampling_power=self.neg_sampling_power,

                logger=self.logger,

                # model-specific parameters
                **self.model_parameters
                )

        return model
Esempio n. 3
0
 def __init__(self, startingPoint):
   super(SegmentString, self).__init__()
   self.setPath(QPainterPath(startingPoint))
   self.actions = segmentStringActions # singleton
   
   # The following are not necessarily serialized, but usually reconstructable upon deserialize.
   # They are only needed when GUI is displaying ControlPoints as Controls
   self.relations = Relations()
   self.cuspness = Cuspness()
   self.controlPointSet = None
Esempio n. 4
0
def initialise_app(max_relations_to_load):
    """Precomputes values shared across requests to this app.

  The registry property is intended for storing these precomputed
  values, so as to avoid global variables.
  """

    # Connect to the database:
    db = DatabaseConnection(path_config='db_config.yaml')
    schema = db.get_latest_schema('prod_')
    db.execute('SET search_path to ' + schema + ';')
    app.registry['db'] = db

    # Retrieve list of relationship edges:
    q = """
      SELECT eid, eid_relation, stakeholder_type_id
      FROM related
      LIMIT %s;
      """
    q_data = [max_relations_to_load]
    edge_list = []
    for row in db.query(q, q_data):
        edge_type = row['stakeholder_type_id'] or 0
        edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type))
        edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type))

    # Construct Relations object from the edge list:
    relations = Relations(edge_list)
    app.registry['relations'] = relations

    # TEMP: Construct Relations using old database data:
    db_old = DatabaseConnection(path_config='db_config_old.yaml',
                                search_path='mysql')
    app.registry['db_old'] = db_old
    q = """SELECT eid1, eid2, length FROM related LIMIT %s;"""
    q_data = [max_relations_to_load]
    edge_list_old = []
    for row in db_old.query(q, q_data):
        edge_list_old.append((row['eid1'], row['eid2'], float(row['length'])))
        edge_list_old.append((row['eid2'], row['eid1'], float(row['length'])))
    relations_old = Relations(edge_list_old)
    app.registry['relations_old'] = relations_old
Esempio n. 5
0
    def apply(self, action):
        if action.name == "shift":
            token = self.buffer.consume()
            sg = action.argv.get()
            if self.stage == "COLLECT":
                Resources.phrasetable[token.word+"_"+token.pos][action.argv.get(None, Variables())] += 1
                if token.ne == "ORGANIZATION" and token.word not in Resources.seen_org:
                    Resources.seen_org.append(token.word)
                    Resources.forg.write(token.word)
                    for node in sg.nodes:
                        if node.isConst == False and node.concept.strip() != "":
                            Resources.forg.write(" " + node.concept)
                    Resources.forg.write("\n")

            test = []
            for n in sg.nodes:
                if len([r for r in sg.relations if r[1] == n]) == 0: # push only root
                    self.stack.push(n)
                    test.append(n)
                    break

            tmprels = Relations()
            for n1, n2, label in sg.relations:
                    self.stack.relations.add(n1, n2, label)
                    tmprels.add(n1, n2, label)
            self.counter += 1
            if len(sg.nodes) == 0:
                graph = "NULL"
            elif tmprels == Relations():
                graph = "(" + sg.nodes[0].concept + ")"
            else:
                graph, _, _ = tostring.to_string(tmprels.triples(), "TOP")
        elif action.name == "reduce":
            node = self.stack.pop()
            if action.argv is not None:
                s, label, _ = action.argv
                self.stack.relations.add(node, s, label)

        elif action.name == "larc":
            label = action.argv
            child = self.stack.get(1)
            top = self.stack.top()
            assert (top is not None and child is not None)

            self.stack.relations.add(top, child, label)
            self.stack.pop(1)

        elif action.name == "rarc":
            label = action.argv
            child = self.stack.get(1)
            top = self.stack.top()
            assert (top is not None and child is not None)

            self.stack.relations.add(child, top, label)

        else:
            raise ValueError("action not defined")
Esempio n. 6
0
 def __init__(self, embs, relations, tokens, dependencies, alignments, oracle, hooks, variables, stage, rules):
     self.semicol_gen_and = False
     self.hooks = hooks
     self.variables = variables
     self.buffer = Buffer(embs, tokens, alignments)
     self.embs = embs
     self.stage = stage
     self.dependencies = Dependencies([(self.buffer.tokens[i1],label,self.buffer.tokens[i2]) for (i1,label,i2) in dependencies])
     self.stack = Stack(embs)
     self.oracle = oracle
     self.rules = rules
     if relations is not None:
         self.gold = Relations(copy.deepcopy(relations))
     else:
         self.gold = None
     self.sentence = " ".join([t.word for t in tokens])
     self.counter = 0
Esempio n. 7
0
def _initialise_relations(db, max_relations_to_load):
    """Returns Relations object build from edges in database `db`."""

    # Retrieve list of relationship edges:
    q = """
      SELECT eid, eid_relation, stakeholder_type_id
      FROM related WHERE eid <> eid_relation
      LIMIT %s;
      """
    q_data = [max_relations_to_load]
    edge_list = []
    for row in db.query(q, q_data):
        edge_type = row['stakeholder_type_id'] or 0
        edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type))
        edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type))
    print('[OK] Received %d edges.' % (len(edge_list)))

    # Construct and return Relations object from the edge list:
    return Relations(edge_list)
Esempio n. 8
0
def download_from_wikidata() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--datapath",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--outpath",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--use", action="store_true", help="")
    args = parser.parse_args()
    t = Relations(args.datapath)
    filenames = t.get_available_filenames()
    t.load_data(filenames)
    entities = t.get_all_entities(["obj_uri", "sub_uri"])
    base_url = "https://www.wikidata.org/wiki/Special:EntityData/{}.json"
    for entity in tqdm.tqdm(entities):
        download_entity(base_url.format(entity),
                        os.path.join(args.outpath, entity + ".json"))
Esempio n. 9
0
 def __init__(self, embs):
     root = Node(True)
     self.embs = embs
     self.nodes = [root]
     self.relations = Relations()
Esempio n. 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--entities",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--outpath",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--languagemapping",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    args = parser.parse_args()
    lang2translateid = load_languagemapping(args.languagemapping)

    for lang in lang2translateid:
        t = Relations(args.data)
        filenames = t.get_available_filenames()
        t.load_data(filenames)
        count = collections.Counter()
        logfile = open(os.path.join(args.outpath, lang + ".log"), "w")
        for filename, relations in t.data.items():
            LOG.info("Processing relation: {}".format(filename))
            outdirectory = os.path.join(args.outpath, lang)
            os.makedirs(outdirectory, exist_ok=True)
            with open(os.path.join(outdirectory, filename + ".jsonl"),
                      "w") as fout:
                for relation in relations:
                    count["in_file"] += 1
                    if ("sub_uri" in relation and "obj_uri" in relation
                            and "sub_label" in relation
                            and "obj_label" in relation):
                        count["available"] += 1
                        obj_uri = relation["obj_uri"]
                        sub_uri = relation["sub_uri"]
                        # load entitiy information
                        obj_surface = get_entity_surface(
                            args.entities, obj_uri, lang)
                        sub_surface = get_entity_surface(
                            args.entities, sub_uri, lang)
                        # write out
                        if obj_surface and sub_surface:
                            count["converted"] += 1
                            to_write = {
                                "sub_uri": sub_uri,
                                "obj_uri": obj_uri,
                                "obj_label": obj_surface,
                                "sub_label": sub_surface,
                                "from_english": False
                            }
                        else:
                            # use english surface forms
                            to_write = {
                                "sub_uri": sub_uri,
                                "obj_uri": obj_uri,
                                "obj_label": relation["obj_label"],
                                "sub_label": relation["sub_label"],
                                "from_english": True
                            }
                        fout.write(json.dumps(to_write) + "\n")
            summary = "{}|{}|{}|(converted/available/in_file)".format(
                count["converted"], count["available"], count["in_file"])
            LOG.info(summary)
            logfile.write("{}|{}\n".format(filename, summary))
        logfile.close()
Esempio n. 11
0
class SegmentString(QGraphicsPathItem):
  '''
  GraphicsItem that is a sequence of Segments.
  
  Segments are line-like curves.

  Segments don't have their own transform,
  so they are moved by changing their control points.
  
  Responsibilities:
  1. know endPoint, startPoint, countSegments
  2. maintain structure (add segment, update segment, delete(FIXME))
  3. get ControlPointSet (so user can manipulate them.)
  4. maintain relations between ControlPoints in ControlPointSet
  5. move control points
  6. maintain cusps and return cuspness of a segment
  
  Specific to Qt GUI toolkit.
  
  Lifetime
  ========
  
  !!! Note that appendSegments() doesn't store references to Segments passed as parameters.
  This stores segments in an internal format (currently QPainterPath), not as Segment instances.
  getControlPointSet() returns ControlPoint instances which refer to Segment instances,
  and all those persist as long as you keep the ControlPointSet.
  
  Internal format using QPainterPath
  ==================================
  A QPainterPath is a sequence of QPathElements having a type.
  For a cubic curve, there are three consecutive QPathElements of type CubicTo.
  QPainterPath is not updateable, only appendable.
  
  Here, the first QPathElement is type MoveTo, followed by 3-tuples of type CubicTo.
  
  ControlPoint Roles and Types
  ============================
  
  ControlPoints play roles.
  The role of a ControlPoint is not explicitly modeled, 
  only modeled by relations between ControlPoints and other conditions.
  
  The relations of ControlPoints to each other are:
  - TiedTo: coincident with a Anchor CP of another segment
  - OppositeTo: is a Anchor CP paired with Anchor CP at opposite end of segment
  - ArmTo: is a CP of an arm between a Direction CP and an Anchor CP
  
  We do it this way for flexibility of design:
  the relations form a network or graph that helps define the behavior when user drags ControlPoints.
  A drag behavior is defined by a traversal method (specialization of walk()) of the relations network.
  
  Cusps
  =====
  
  Cusp-ness is a property between two segments.
  EG two curves form a cusp if their Anchor-Direction arms are NOT colinear.
  It is dynamic, changing as a user moves ControlPoints and thus Segments.
  When segments are added, their cuspness can be declared (but it is not checked.)
  When segments change, cuspness is checked.
  Cuspness is not stored in most serialized formats like SVG.
  Cuspness supports user friendly GUI: cusp points move differently.
  '''
  
  ELEMENTS_PER_SEGMENT = 3
  
  def __init__(self, startingPoint):
    super(SegmentString, self).__init__()
    self.setPath(QPainterPath(startingPoint))
    self.actions = segmentStringActions # singleton
    
    # The following are not necessarily serialized, but usually reconstructable upon deserialize.
    # They are only needed when GUI is displaying ControlPoints as Controls
    self.relations = Relations()
    self.cuspness = Cuspness()
    self.controlPointSet = None
  
  
  # Inherits path()

  '''
  Responsibility: 1. know end points.
  '''

  def getEndPoint(self):
    ''' 
    End point of a SegmentString is:
    - coordinates of its last element
    - OR startingPoint if has no Segments
    '''
    return self._pointForPathElement(element = self.path().elementAt(self.path().elementCount() - 1))
  
  def getStartPoint(self):
    ''' 
    Start point of a SegmentString is:
    - first element, regardless if has any Segments
    '''
    return self._pointForPathElement(element = self.path().elementAt(0))
  
  
  def _pointForPathElement(self, element):
    '''
    Return  QPointF for QPathElements.
    QPathElements don't have a x() method
    Symptoms are "Exception: reverse not implemented"
    '''
    return QPointF(element.x, element.y)
  
  
  
  '''
  Responsibililty: 2. maintain structure.
  '''
  
  def appendSegments(self, segments, segmentCuspness):
    ''' 
    Append segments sequentially to end of self. 
    
    cuspness is [Bool,] equal in length to segments and tells whether each segment is a cusp.
    
    !!! The QPainterPath instance returned by QGraphicsPathItem.path() is a copy
    and when appended to does not change the display.
    IOW QGraphicsPathItem keeps a copy when you call setPath()
    
    FUTURE might be faster to union existing path with new path.
    '''
    # print segments
    
    # copy current path
    pathCopy = self.path()
    segmentOrdinal = 0
    for segment in segments:
      indexOfSegmentInParent=pathCopy.elementCount()
      self._appendSegmentToPath(segment, pathCopy)
      if segmentCuspness[segmentOrdinal]:
        self.cuspness.setCuspness(indexOfSegmentInParent)
      segmentOrdinal += 1
      
    # !!! pathCopy is NOT an alias for self.path() now, they differ.  Hence:
    self.setPath(pathCopy)
    # No need to invalidate or update display, at least for Qt
    
    # TEST try to alter the path: has no effect, QPathElements are constants??
    #pathCopy.elementAt(1).x += 20
    #self.setPath(pathCopy)


  def _appendSegmentToPath(self, segment, path):
    ''' 
    Append internal representation of given Segment instance to given path. 
    
    !!! All segments represented by QPathElement of ElementType:cubic i.e. curve
    !!! Cubic only wants the final three ControlPoints.
    '''
    path.cubicTo(*segment.asPoints()[1:])
    
  
  def segmentChanged(self, segment, indexOfSegmentInParent):
    ''' Given segment has changed. Propagate change to self. '''
    self.updateSegment(segment, indexOfSegmentInParent)
  
  
  def updateSegment(self, segment, indexOfSegmentInParent):
    '''
    Update drawable with changed segment.
    
    Understands that internal format self.path() is not updateable.
    Thus it is copy into new, with one changed segment in the middle.
    IE copies prefix, appends changed Segment, copies suffix.
    '''
    # startingPoint same as existing path
    # FIXME: what if user changes the starting controlPoint???
    newPath = QPainterPath(self.path().elementAt(0))
    for segmentIndex in self._segmentIndexIter():
      if segmentIndex == indexOfSegmentInParent:
        self._appendSegmentToPath(segment, newPath)
      else:
        self._copySegmentPathToPath(sourcePath=self.path(), destinationPath=newPath, segmentIndex=segmentIndex)
    # Assert SegmentString.getEndPoint is correct even case last segment updated
    self.setPath(newPath)
        
      
  def _segmentIndexIter(self):
    ''' 
    Generate indexes of segments.
    An index is NOT the ordinal.
    An index is the ordinal of the QPathElement of the first QPathElement for segment.
    Starts at 1, since here zeroeth QPathElement is a MoveTo.
    EG 1, 4, 7, 10, ...
    
    !!! Relies on all segments represented as 3-tuple curves.
    '''
    for i in range(0, self.segmentCount()):
      yield i * SegmentString.ELEMENTS_PER_SEGMENT + 1
  
  
  def segmentCount(self):
    return self.path().elementCount()/SegmentString.ELEMENTS_PER_SEGMENT
  
  def _copySegmentPathToPath(self, sourcePath, destinationPath, segmentIndex):
    ''' Use elements of a segment from sourcePath to append a segment to destinationPath. '''
    destinationPath.cubicTo(*self._pointsInPathForSegment(sourcePath,segmentIndex))
  
  
  def _pointsInPathForSegment(self, path, segmentIndex):
    ''' 
    Return list of QPointF for QPathElements of segment.
    !!! This is a 3-tuple, not sufficient for creating Segment
    '''
    result = []
    for i in range(0, SegmentString.ELEMENTS_PER_SEGMENT):
      result.append(self._pointForPathElement(element = path.elementAt(segmentIndex + i)))
    return result
    
    
    
  '''
  Responsibility: 
  3. Get getControlPointSet so user can manipulate them
  4. maintain relations between ControlPoints in ControlPointSet
  '''
  def getControlPointSet(self):
    '''
    Instantiate for self:
    - ControlPoints
    - Segments
    - Relations (among ControlPoints)
    Returns list of ControlPoint.
    '''
    # NOT assert self.controlPointSet is None
    self.relations.clear()
    result = []
    previousEndControlPoint = None
    for segmentIndex in self._segmentIndexIter():
      segment = self._createSegmentAt(segmentIndex)
      for controlPoint in segment.controlPointIter():
        result.append(controlPoint)
      segment.createRelations(relations=self.relations, previousEndAnchor=previousEndControlPoint)
      previousEndControlPoint = segment.getEndControlPoint()
    self.controlPointSet = result # Remember my own ControlPoint set
    # FIXME: above does NOT allow for many views of same SegmentString
    return result
  
  
  def _createSegmentAt(self, segmentIndex):
    ''' Create Segment instance for what is described in path at segmentIndex. 
    
    !!! Expand the run-encoding of QPainterPath
    (last point of previous segment shared with first point of next segment.)
    E.G. CurveSegment requires four points from three in the path.
    '''
    # print "SegmentIndex", segmentIndex
    if segmentIndex == 1:
      # Only one prior element, a MoveTo
      startPoint = self.getStartPoint()
    else:
      # Last point of previous segment is first point of this segment
      startPoint = self._pointsInPathForSegment(self.path(), segmentIndex - SegmentString.ELEMENTS_PER_SEGMENT)[-1]
    pointsFromPath = self._pointsInPathForSegment(self.path(), segmentIndex)
    segment = CurveSegment(startPoint, *pointsFromPath)
    # assert ControlPoints were created and refer to segment
    segment.setIndexInParent(parent=self, indexOfSegmentInParent = segmentIndex)
    return segment
  
  
  def clearTraversal(self):
    ''' Clear traversal flags to prepare for new traversal. '''
    for controlPoint in self.controlPointSet:
      controlPoint.setTraversed(False)
  
  
  '''
  Responsibility:  5. move control points
  '''
  
  def moveRelated(self, controlPoint, deltaCoordinate, alternateMode):
    ''' Move (translate) controlPoint and set of related controlPoints. '''
    self.clearTraversal() # movement by traversal of relations
    # delegate to strategy/policy
    self.actions.moveRelated(self.relations, controlPoint, deltaCoordinate, alternateMode)
  
  
  '''
  6. maintain cusps and return cuspness of a segment
  '''
  def isSegmentCusp(self, segmentIndex):
    return self.cuspness.isCusp(segmentIndex)
    
  def setSegmentCuspness(self, segmentIndex):
    self.cuspness.setCuspness(segmentIndex)


  '''
  TESTING: Reimplement paint() to help see segments.  Not necessary for production use.
  '''
  def paint(self, painter, styleOption, widget):
    ''' Reimplemented to paint elements in alternating colors '''
    path = self.path()  # alias
    pathEnd = None
    i = 0
    while True:
      try:
        element = path.elementAt(i)
        # print type(element), element.type
        if element.isMoveTo():
          pathEnd = QPointF(element.x, element.y)
          i+=1
        elif element.isCurveTo():
          # Gather curve data, since is spread across elements of type curveElementData
          cp1 = QPointF(element.x, element.y)
          element = path.elementAt(i+1)
          cp2 = QPointF(element.x, element.y)
          element = path.elementAt(i+2)
          newEnd = QPointF(element.x, element.y)
          # create a subpath, since painter has no drawCubic method
          subpath=QPainterPath()
          subpath.moveTo(pathEnd)
          subpath.cubicTo(cp1, cp2, newEnd)
          painter.drawPath(subpath)
          
          pathEnd = newEnd
          i+=3
        else:
          print "unhandled path element", element.type
          i+=1
          """
          TODO: if SegmentStringss contain lines (w/o Direction ControlPoints)
          !!! We don't use QPathElements of type Line
          elif element.isLineTo():
            newEnd = QPointF(element.x, element.y)
            painter.drawLine(pathEnd, newEnd)
            pathEnd = newEnd
            i+=1
          """
        if i >= path.elementCount():
          break
      except Exception as inst:
        print inst
        break
        
      # Alternate colors
      if i%2 == 1:
        painter.setPen(Qt.blue)
      else:
        painter.setPen(Qt.red)
Esempio n. 12
0
 def __init__(self, relations):
     self.gold = Relations(copy.deepcopy(relations))
Esempio n. 13
0
class Oracle:
    def reentrancy(self, node, found):
        siblings = [
            item[0] for p in found.parents[node]
            for item in found.children[p[0]] if item[0] != node
        ]
        for s in siblings:
            label = self.gold.isRel(node, s)
            if label is not None:
                self.gold.parents[s].remove((node, label))
                self.gold.children[node].remove((s, label))
                parents = [i[0] for i in found.parents[node]]
                parents = [i[0] for i in found.parents[s] if i[0] in parents]
                return [s, label, siblings]
        return None

    def __init__(self, relations):
        self.gold = Relations(copy.deepcopy(relations))

    def valid_actions(self, state):
        top = state.stack.top()

        other = state.stack.get(1)
        label = self.gold.isRel(top, other)
        if label is not None:
            self.gold.children[top].remove((other, label))
            self.gold.parents[other].remove((top, label))
            return Action("larc", label)

        label = self.gold.isRel(other, top)
        if label is not None:
            self.gold.parents[top].remove((other, label))
            self.gold.children[other].remove((top, label))
            return Action("rarc", label)

        if state.stack.isEmpty() == False:
            found = False
            for item in state.buffer.tokens:
                for node in item.nodes:
                    if self.gold.isRel(top,
                                       node) is not None or self.gold.isRel(
                                           node, top) is not None:
                        found = True
            if found == False:
                return Action("reduce",
                              self.reentrancy(top, state.stack.relations))

        if state.buffer.isEmpty() == False:
            token = state.buffer.peek()
            nodes = token.nodes
            relations = []
            flag = False
            for n1 in nodes:
                for n2 in nodes:
                    if n1 != n2:
                        children_n1 = copy.deepcopy(self.gold.children[n1])
                        for (child, label) in children_n1:
                            if child == n2:
                                relations.append((n1, n2, label))
                                self.gold.children[n1].remove((child, label))
                                self.gold.parents[child].remove((n1, label))
                        children_n2 = copy.deepcopy(self.gold.children[n2])
                        for (child, label) in children_n2:
                            if child == n1:
                                relations.append((n2, n1, label))
                                self.gold.children[n2].remove((child, label))
                                self.gold.parents[child].remove((n2, label))

            subgraph = Subgraph(nodes, relations)
            return Action("shift", subgraph)

        return None