def __init__(self, logger, positive_rel_filepath, negative_rel_filepath, vocab): self.logger = logger self.pos_relations_parents = [] self.pos_relations_children = [] rels = Relations(positive_rel_filepath, reverse=False) for node_parent, node_child in rels: assert node_parent != node_child node_parent_idx = vocab[node_parent].index node_child_idx = vocab[node_child].index self.pos_relations_parents.append(node_parent_idx) self.pos_relations_children.append(node_child_idx) self.neg_relations_parents = [] self.neg_relations_children = [] rels = Relations(negative_rel_filepath, reverse=False) for node_parent, node_child in rels: assert node_parent != node_child node_parent_idx = vocab[node_parent].index node_child_idx = vocab[node_child].index self.neg_relations_parents.append(node_parent_idx) self.neg_relations_children.append(node_child_idx) logger.info('eval datasets file pos = ' + positive_rel_filepath + ' neg = ' + negative_rel_filepath + '; eval num rels pos = ' + str(len(self.pos_relations_parents)) + ' neg = ' + str(len(self.neg_relations_parents)))
def build_model(self): train_path = self.input()["data"]["train"].path train_data = Relations(train_path, reverse=False) cls = self.get_model_class() model = cls(train_data=train_data, dim=self.dim, init_range=(self.init_range_min, self.init_range_max), lr=self.lr, opt=self.opt, # rsgd or exp_map burn_in=self.burn_in, seed=self.seed, num_negative=self.num_negative, neg_sampl_strategy=self.neg_sampl_strategy, where_not_to_sample=self.where_not_to_sample, neg_edges_attach=self.neg_edges_attach, always_v_in_neg = self.always_v_in_neg, neg_sampling_power=self.neg_sampling_power, logger=self.logger, # model-specific parameters **self.model_parameters ) return model
def __init__(self, startingPoint): super(SegmentString, self).__init__() self.setPath(QPainterPath(startingPoint)) self.actions = segmentStringActions # singleton # The following are not necessarily serialized, but usually reconstructable upon deserialize. # They are only needed when GUI is displaying ControlPoints as Controls self.relations = Relations() self.cuspness = Cuspness() self.controlPointSet = None
def initialise_app(max_relations_to_load): """Precomputes values shared across requests to this app. The registry property is intended for storing these precomputed values, so as to avoid global variables. """ # Connect to the database: db = DatabaseConnection(path_config='db_config.yaml') schema = db.get_latest_schema('prod_') db.execute('SET search_path to ' + schema + ';') app.registry['db'] = db # Retrieve list of relationship edges: q = """ SELECT eid, eid_relation, stakeholder_type_id FROM related LIMIT %s; """ q_data = [max_relations_to_load] edge_list = [] for row in db.query(q, q_data): edge_type = row['stakeholder_type_id'] or 0 edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type)) edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type)) # Construct Relations object from the edge list: relations = Relations(edge_list) app.registry['relations'] = relations # TEMP: Construct Relations using old database data: db_old = DatabaseConnection(path_config='db_config_old.yaml', search_path='mysql') app.registry['db_old'] = db_old q = """SELECT eid1, eid2, length FROM related LIMIT %s;""" q_data = [max_relations_to_load] edge_list_old = [] for row in db_old.query(q, q_data): edge_list_old.append((row['eid1'], row['eid2'], float(row['length']))) edge_list_old.append((row['eid2'], row['eid1'], float(row['length']))) relations_old = Relations(edge_list_old) app.registry['relations_old'] = relations_old
def apply(self, action): if action.name == "shift": token = self.buffer.consume() sg = action.argv.get() if self.stage == "COLLECT": Resources.phrasetable[token.word+"_"+token.pos][action.argv.get(None, Variables())] += 1 if token.ne == "ORGANIZATION" and token.word not in Resources.seen_org: Resources.seen_org.append(token.word) Resources.forg.write(token.word) for node in sg.nodes: if node.isConst == False and node.concept.strip() != "": Resources.forg.write(" " + node.concept) Resources.forg.write("\n") test = [] for n in sg.nodes: if len([r for r in sg.relations if r[1] == n]) == 0: # push only root self.stack.push(n) test.append(n) break tmprels = Relations() for n1, n2, label in sg.relations: self.stack.relations.add(n1, n2, label) tmprels.add(n1, n2, label) self.counter += 1 if len(sg.nodes) == 0: graph = "NULL" elif tmprels == Relations(): graph = "(" + sg.nodes[0].concept + ")" else: graph, _, _ = tostring.to_string(tmprels.triples(), "TOP") elif action.name == "reduce": node = self.stack.pop() if action.argv is not None: s, label, _ = action.argv self.stack.relations.add(node, s, label) elif action.name == "larc": label = action.argv child = self.stack.get(1) top = self.stack.top() assert (top is not None and child is not None) self.stack.relations.add(top, child, label) self.stack.pop(1) elif action.name == "rarc": label = action.argv child = self.stack.get(1) top = self.stack.top() assert (top is not None and child is not None) self.stack.relations.add(child, top, label) else: raise ValueError("action not defined")
def __init__(self, embs, relations, tokens, dependencies, alignments, oracle, hooks, variables, stage, rules): self.semicol_gen_and = False self.hooks = hooks self.variables = variables self.buffer = Buffer(embs, tokens, alignments) self.embs = embs self.stage = stage self.dependencies = Dependencies([(self.buffer.tokens[i1],label,self.buffer.tokens[i2]) for (i1,label,i2) in dependencies]) self.stack = Stack(embs) self.oracle = oracle self.rules = rules if relations is not None: self.gold = Relations(copy.deepcopy(relations)) else: self.gold = None self.sentence = " ".join([t.word for t in tokens]) self.counter = 0
def _initialise_relations(db, max_relations_to_load): """Returns Relations object build from edges in database `db`.""" # Retrieve list of relationship edges: q = """ SELECT eid, eid_relation, stakeholder_type_id FROM related WHERE eid <> eid_relation LIMIT %s; """ q_data = [max_relations_to_load] edge_list = [] for row in db.query(q, q_data): edge_type = row['stakeholder_type_id'] or 0 edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type)) edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type)) print('[OK] Received %d edges.' % (len(edge_list))) # Construct and return Relations object from the edge list: return Relations(edge_list)
def download_from_wikidata() -> None: parser = argparse.ArgumentParser() parser.add_argument("--datapath", default=None, type=str, required=True, help="") parser.add_argument("--outpath", default=None, type=str, required=True, help="") parser.add_argument("--use", action="store_true", help="") args = parser.parse_args() t = Relations(args.datapath) filenames = t.get_available_filenames() t.load_data(filenames) entities = t.get_all_entities(["obj_uri", "sub_uri"]) base_url = "https://www.wikidata.org/wiki/Special:EntityData/{}.json" for entity in tqdm.tqdm(entities): download_entity(base_url.format(entity), os.path.join(args.outpath, entity + ".json"))
def __init__(self, embs): root = Node(True) self.embs = embs self.nodes = [root] self.relations = Relations()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--data", default=None, type=str, required=True, help="") parser.add_argument("--entities", default=None, type=str, required=True, help="") parser.add_argument("--outpath", default=None, type=str, required=True, help="") parser.add_argument("--languagemapping", default=None, type=str, required=True, help="") args = parser.parse_args() lang2translateid = load_languagemapping(args.languagemapping) for lang in lang2translateid: t = Relations(args.data) filenames = t.get_available_filenames() t.load_data(filenames) count = collections.Counter() logfile = open(os.path.join(args.outpath, lang + ".log"), "w") for filename, relations in t.data.items(): LOG.info("Processing relation: {}".format(filename)) outdirectory = os.path.join(args.outpath, lang) os.makedirs(outdirectory, exist_ok=True) with open(os.path.join(outdirectory, filename + ".jsonl"), "w") as fout: for relation in relations: count["in_file"] += 1 if ("sub_uri" in relation and "obj_uri" in relation and "sub_label" in relation and "obj_label" in relation): count["available"] += 1 obj_uri = relation["obj_uri"] sub_uri = relation["sub_uri"] # load entitiy information obj_surface = get_entity_surface( args.entities, obj_uri, lang) sub_surface = get_entity_surface( args.entities, sub_uri, lang) # write out if obj_surface and sub_surface: count["converted"] += 1 to_write = { "sub_uri": sub_uri, "obj_uri": obj_uri, "obj_label": obj_surface, "sub_label": sub_surface, "from_english": False } else: # use english surface forms to_write = { "sub_uri": sub_uri, "obj_uri": obj_uri, "obj_label": relation["obj_label"], "sub_label": relation["sub_label"], "from_english": True } fout.write(json.dumps(to_write) + "\n") summary = "{}|{}|{}|(converted/available/in_file)".format( count["converted"], count["available"], count["in_file"]) LOG.info(summary) logfile.write("{}|{}\n".format(filename, summary)) logfile.close()
class SegmentString(QGraphicsPathItem): ''' GraphicsItem that is a sequence of Segments. Segments are line-like curves. Segments don't have their own transform, so they are moved by changing their control points. Responsibilities: 1. know endPoint, startPoint, countSegments 2. maintain structure (add segment, update segment, delete(FIXME)) 3. get ControlPointSet (so user can manipulate them.) 4. maintain relations between ControlPoints in ControlPointSet 5. move control points 6. maintain cusps and return cuspness of a segment Specific to Qt GUI toolkit. Lifetime ======== !!! Note that appendSegments() doesn't store references to Segments passed as parameters. This stores segments in an internal format (currently QPainterPath), not as Segment instances. getControlPointSet() returns ControlPoint instances which refer to Segment instances, and all those persist as long as you keep the ControlPointSet. Internal format using QPainterPath ================================== A QPainterPath is a sequence of QPathElements having a type. For a cubic curve, there are three consecutive QPathElements of type CubicTo. QPainterPath is not updateable, only appendable. Here, the first QPathElement is type MoveTo, followed by 3-tuples of type CubicTo. ControlPoint Roles and Types ============================ ControlPoints play roles. The role of a ControlPoint is not explicitly modeled, only modeled by relations between ControlPoints and other conditions. The relations of ControlPoints to each other are: - TiedTo: coincident with a Anchor CP of another segment - OppositeTo: is a Anchor CP paired with Anchor CP at opposite end of segment - ArmTo: is a CP of an arm between a Direction CP and an Anchor CP We do it this way for flexibility of design: the relations form a network or graph that helps define the behavior when user drags ControlPoints. A drag behavior is defined by a traversal method (specialization of walk()) of the relations network. Cusps ===== Cusp-ness is a property between two segments. EG two curves form a cusp if their Anchor-Direction arms are NOT colinear. It is dynamic, changing as a user moves ControlPoints and thus Segments. When segments are added, their cuspness can be declared (but it is not checked.) When segments change, cuspness is checked. Cuspness is not stored in most serialized formats like SVG. Cuspness supports user friendly GUI: cusp points move differently. ''' ELEMENTS_PER_SEGMENT = 3 def __init__(self, startingPoint): super(SegmentString, self).__init__() self.setPath(QPainterPath(startingPoint)) self.actions = segmentStringActions # singleton # The following are not necessarily serialized, but usually reconstructable upon deserialize. # They are only needed when GUI is displaying ControlPoints as Controls self.relations = Relations() self.cuspness = Cuspness() self.controlPointSet = None # Inherits path() ''' Responsibility: 1. know end points. ''' def getEndPoint(self): ''' End point of a SegmentString is: - coordinates of its last element - OR startingPoint if has no Segments ''' return self._pointForPathElement(element = self.path().elementAt(self.path().elementCount() - 1)) def getStartPoint(self): ''' Start point of a SegmentString is: - first element, regardless if has any Segments ''' return self._pointForPathElement(element = self.path().elementAt(0)) def _pointForPathElement(self, element): ''' Return QPointF for QPathElements. QPathElements don't have a x() method Symptoms are "Exception: reverse not implemented" ''' return QPointF(element.x, element.y) ''' Responsibililty: 2. maintain structure. ''' def appendSegments(self, segments, segmentCuspness): ''' Append segments sequentially to end of self. cuspness is [Bool,] equal in length to segments and tells whether each segment is a cusp. !!! The QPainterPath instance returned by QGraphicsPathItem.path() is a copy and when appended to does not change the display. IOW QGraphicsPathItem keeps a copy when you call setPath() FUTURE might be faster to union existing path with new path. ''' # print segments # copy current path pathCopy = self.path() segmentOrdinal = 0 for segment in segments: indexOfSegmentInParent=pathCopy.elementCount() self._appendSegmentToPath(segment, pathCopy) if segmentCuspness[segmentOrdinal]: self.cuspness.setCuspness(indexOfSegmentInParent) segmentOrdinal += 1 # !!! pathCopy is NOT an alias for self.path() now, they differ. Hence: self.setPath(pathCopy) # No need to invalidate or update display, at least for Qt # TEST try to alter the path: has no effect, QPathElements are constants?? #pathCopy.elementAt(1).x += 20 #self.setPath(pathCopy) def _appendSegmentToPath(self, segment, path): ''' Append internal representation of given Segment instance to given path. !!! All segments represented by QPathElement of ElementType:cubic i.e. curve !!! Cubic only wants the final three ControlPoints. ''' path.cubicTo(*segment.asPoints()[1:]) def segmentChanged(self, segment, indexOfSegmentInParent): ''' Given segment has changed. Propagate change to self. ''' self.updateSegment(segment, indexOfSegmentInParent) def updateSegment(self, segment, indexOfSegmentInParent): ''' Update drawable with changed segment. Understands that internal format self.path() is not updateable. Thus it is copy into new, with one changed segment in the middle. IE copies prefix, appends changed Segment, copies suffix. ''' # startingPoint same as existing path # FIXME: what if user changes the starting controlPoint??? newPath = QPainterPath(self.path().elementAt(0)) for segmentIndex in self._segmentIndexIter(): if segmentIndex == indexOfSegmentInParent: self._appendSegmentToPath(segment, newPath) else: self._copySegmentPathToPath(sourcePath=self.path(), destinationPath=newPath, segmentIndex=segmentIndex) # Assert SegmentString.getEndPoint is correct even case last segment updated self.setPath(newPath) def _segmentIndexIter(self): ''' Generate indexes of segments. An index is NOT the ordinal. An index is the ordinal of the QPathElement of the first QPathElement for segment. Starts at 1, since here zeroeth QPathElement is a MoveTo. EG 1, 4, 7, 10, ... !!! Relies on all segments represented as 3-tuple curves. ''' for i in range(0, self.segmentCount()): yield i * SegmentString.ELEMENTS_PER_SEGMENT + 1 def segmentCount(self): return self.path().elementCount()/SegmentString.ELEMENTS_PER_SEGMENT def _copySegmentPathToPath(self, sourcePath, destinationPath, segmentIndex): ''' Use elements of a segment from sourcePath to append a segment to destinationPath. ''' destinationPath.cubicTo(*self._pointsInPathForSegment(sourcePath,segmentIndex)) def _pointsInPathForSegment(self, path, segmentIndex): ''' Return list of QPointF for QPathElements of segment. !!! This is a 3-tuple, not sufficient for creating Segment ''' result = [] for i in range(0, SegmentString.ELEMENTS_PER_SEGMENT): result.append(self._pointForPathElement(element = path.elementAt(segmentIndex + i))) return result ''' Responsibility: 3. Get getControlPointSet so user can manipulate them 4. maintain relations between ControlPoints in ControlPointSet ''' def getControlPointSet(self): ''' Instantiate for self: - ControlPoints - Segments - Relations (among ControlPoints) Returns list of ControlPoint. ''' # NOT assert self.controlPointSet is None self.relations.clear() result = [] previousEndControlPoint = None for segmentIndex in self._segmentIndexIter(): segment = self._createSegmentAt(segmentIndex) for controlPoint in segment.controlPointIter(): result.append(controlPoint) segment.createRelations(relations=self.relations, previousEndAnchor=previousEndControlPoint) previousEndControlPoint = segment.getEndControlPoint() self.controlPointSet = result # Remember my own ControlPoint set # FIXME: above does NOT allow for many views of same SegmentString return result def _createSegmentAt(self, segmentIndex): ''' Create Segment instance for what is described in path at segmentIndex. !!! Expand the run-encoding of QPainterPath (last point of previous segment shared with first point of next segment.) E.G. CurveSegment requires four points from three in the path. ''' # print "SegmentIndex", segmentIndex if segmentIndex == 1: # Only one prior element, a MoveTo startPoint = self.getStartPoint() else: # Last point of previous segment is first point of this segment startPoint = self._pointsInPathForSegment(self.path(), segmentIndex - SegmentString.ELEMENTS_PER_SEGMENT)[-1] pointsFromPath = self._pointsInPathForSegment(self.path(), segmentIndex) segment = CurveSegment(startPoint, *pointsFromPath) # assert ControlPoints were created and refer to segment segment.setIndexInParent(parent=self, indexOfSegmentInParent = segmentIndex) return segment def clearTraversal(self): ''' Clear traversal flags to prepare for new traversal. ''' for controlPoint in self.controlPointSet: controlPoint.setTraversed(False) ''' Responsibility: 5. move control points ''' def moveRelated(self, controlPoint, deltaCoordinate, alternateMode): ''' Move (translate) controlPoint and set of related controlPoints. ''' self.clearTraversal() # movement by traversal of relations # delegate to strategy/policy self.actions.moveRelated(self.relations, controlPoint, deltaCoordinate, alternateMode) ''' 6. maintain cusps and return cuspness of a segment ''' def isSegmentCusp(self, segmentIndex): return self.cuspness.isCusp(segmentIndex) def setSegmentCuspness(self, segmentIndex): self.cuspness.setCuspness(segmentIndex) ''' TESTING: Reimplement paint() to help see segments. Not necessary for production use. ''' def paint(self, painter, styleOption, widget): ''' Reimplemented to paint elements in alternating colors ''' path = self.path() # alias pathEnd = None i = 0 while True: try: element = path.elementAt(i) # print type(element), element.type if element.isMoveTo(): pathEnd = QPointF(element.x, element.y) i+=1 elif element.isCurveTo(): # Gather curve data, since is spread across elements of type curveElementData cp1 = QPointF(element.x, element.y) element = path.elementAt(i+1) cp2 = QPointF(element.x, element.y) element = path.elementAt(i+2) newEnd = QPointF(element.x, element.y) # create a subpath, since painter has no drawCubic method subpath=QPainterPath() subpath.moveTo(pathEnd) subpath.cubicTo(cp1, cp2, newEnd) painter.drawPath(subpath) pathEnd = newEnd i+=3 else: print "unhandled path element", element.type i+=1 """ TODO: if SegmentStringss contain lines (w/o Direction ControlPoints) !!! We don't use QPathElements of type Line elif element.isLineTo(): newEnd = QPointF(element.x, element.y) painter.drawLine(pathEnd, newEnd) pathEnd = newEnd i+=1 """ if i >= path.elementCount(): break except Exception as inst: print inst break # Alternate colors if i%2 == 1: painter.setPen(Qt.blue) else: painter.setPen(Qt.red)
def __init__(self, relations): self.gold = Relations(copy.deepcopy(relations))
class Oracle: def reentrancy(self, node, found): siblings = [ item[0] for p in found.parents[node] for item in found.children[p[0]] if item[0] != node ] for s in siblings: label = self.gold.isRel(node, s) if label is not None: self.gold.parents[s].remove((node, label)) self.gold.children[node].remove((s, label)) parents = [i[0] for i in found.parents[node]] parents = [i[0] for i in found.parents[s] if i[0] in parents] return [s, label, siblings] return None def __init__(self, relations): self.gold = Relations(copy.deepcopy(relations)) def valid_actions(self, state): top = state.stack.top() other = state.stack.get(1) label = self.gold.isRel(top, other) if label is not None: self.gold.children[top].remove((other, label)) self.gold.parents[other].remove((top, label)) return Action("larc", label) label = self.gold.isRel(other, top) if label is not None: self.gold.parents[top].remove((other, label)) self.gold.children[other].remove((top, label)) return Action("rarc", label) if state.stack.isEmpty() == False: found = False for item in state.buffer.tokens: for node in item.nodes: if self.gold.isRel(top, node) is not None or self.gold.isRel( node, top) is not None: found = True if found == False: return Action("reduce", self.reentrancy(top, state.stack.relations)) if state.buffer.isEmpty() == False: token = state.buffer.peek() nodes = token.nodes relations = [] flag = False for n1 in nodes: for n2 in nodes: if n1 != n2: children_n1 = copy.deepcopy(self.gold.children[n1]) for (child, label) in children_n1: if child == n2: relations.append((n1, n2, label)) self.gold.children[n1].remove((child, label)) self.gold.parents[child].remove((n1, label)) children_n2 = copy.deepcopy(self.gold.children[n2]) for (child, label) in children_n2: if child == n1: relations.append((n2, n1, label)) self.gold.children[n2].remove((child, label)) self.gold.parents[child].remove((n2, label)) subgraph = Subgraph(nodes, relations) return Action("shift", subgraph) return None