class EdgeSubLabel(object): """Hold meta informations of a single relationship. i.e. src_label -> edge_label -> dst_label """ def __init__( self, loader, properties=None, source=None, destination=None, load_strategy="both_out_in", ): if isinstance(loader, Loader): self.loader = loader else: self.loader = Loader(loader) self.raw_properties = properties self.properties = [] self.source_vid = 0 self.source_label = "" self.destination_vid = 1 self.destination_label = "" self.load_strategy = "" if source is not None: self.set_source(source) if destination is not None: self.set_destination(destination) if (isinstance(self.source_vid, int) and isinstance(self.destination_vid, str)) or ( isinstance(self.source_vid, str) and isinstance(self.destination_vid, int)): raise SyntaxError( "Source vid and destination vid must have same formats, both use name or both use index" ) self.set_load_strategy(load_strategy) def finish(self, id_type: str): self.add_property(str(self.source_vid), id_type) self.add_property(str(self.destination_vid), id_type) if self.raw_properties: self.add_properties(self.raw_properties) elif self.loader.deduced_properties: self.add_properties(self.loader.deduced_properties) self.loader.select_columns(self.properties, include_all=bool(not self.raw_properties)) self.loader.finish() def __str__(self) -> str: s = "\ntype: EdgeSubLabel" s += "\nsource_label: " + self.source_label s += "\ndestination_label: " + self.destination_label s += "\nproperties: " + str(self.properties) s += "\nloader: " + repr(self.loader) return s def __repr__(self) -> str: return self.__str__() @staticmethod def resolve_src_dst_value(value: Union[int, str, Tuple[Union[int, str], str]]): """Resolve the edge's source and destination. Args: value (Union[int, str, Tuple[Union[int, str], str]]): 1. a int, represent vid id. a str, represent vid name 2. a ([int/str], str). former represents vid, latter represents label Raises: SyntaxError: If the format is incorrect. """ if isinstance(value, (int, str)): check_argument( isinstance(value, int) or (isinstance(value, str) and not value.isdecimal()), "Column name cannot be decimal", ) return value, "" elif isinstance(value, Sequence): check_argument(len(value) == 2) check_argument( isinstance(value[0], int) or (isinstance(value[0], str) and not value[0].isdecimal()), "Column name cannot be decimal", ) check_argument(isinstance(value[1], str), "Label must be str") return value[0], value[1] else: raise InvalidArgumentError( "Source / destination format incorrect. Expect vid or [vid, source_label]" ) def set_source(self, source: Union[int, str, Tuple[Union[int, str], str]]): self.source_vid, self.source_label = self.resolve_src_dst_value(source) def set_destination(self, destination: Union[int, str, Tuple[Union[int, str], str]]): self.destination_vid, self.destination_label = self.resolve_src_dst_value( destination) def set_load_strategy(self, strategy: str): check_argument( strategy in ("only_out", "only_in", "both_out_in"), "invalid load strategy: " + strategy, ) self.load_strategy = strategy def add_property(self, prop: str, dtype=None) -> None: """prop is a str, representing name. It can optionally have a type.""" self.properties.append((prop, utils.unify_type(dtype))) def add_properties(self, properties: Sequence) -> None: for prop in properties: if isinstance(prop, str): self.add_property(prop) else: self.add_property(prop[0], prop[1]) def get_attr(self): attr_list = attr_value_pb2.NameAttrList() attr_list.name = "{}_{}".format(self.source_label, self.destination_label) attr_list.attr[types_pb2.SRC_LABEL].CopyFrom( utils.s_to_attr(self.source_label)) attr_list.attr[types_pb2.DST_LABEL].CopyFrom( utils.s_to_attr(self.destination_label)) attr_list.attr[types_pb2.LOAD_STRATEGY].CopyFrom( utils.s_to_attr(self.load_strategy)) attr_list.attr[types_pb2.SRC_VID].CopyFrom( utils.s_to_attr(str(self.source_vid))) attr_list.attr[types_pb2.DST_VID].CopyFrom( utils.s_to_attr(str(self.destination_vid))) attr_list.attr[types_pb2.LOADER].CopyFrom(self.loader.get_attr()) props = [] for prop in self.properties[2:]: prop_attr = attr_value_pb2.NameAttrList() prop_attr.name = prop[0] prop_attr.attr[0].CopyFrom(utils.type_to_attr(prop[1])) props.append(prop_attr) attr_list.attr[types_pb2.PROPERTIES].list.func.extend(props) return attr_list
class VertexLabel(object): """Holds meta informations about a single vertex label.""" def __init__( self, label: str, loader: Any, properties: Sequence = None, vid_field: Union[str, int] = 0, session_id=None, id_type: str = "int64_t", vformat=None, ): self.label = label # loader to take various data source if isinstance(loader, Loader): self.loader = loader else: self.loader = Loader(loader) # raw properties passed by user parameters self.raw_properties = properties # finally properties for constructing graph self.properties = [] # column index or property name used as id field self.vid_field = vid_field # type of vertex original id # should be consistent with the original graph self.id_type = id_type self._session_id = session_id self._vformat = vformat # normalize properties # add vid to property list self.add_property(str(self.vid_field), self.id_type) if self.raw_properties: self.add_properties(self.raw_properties) elif self.loader.deduced_properties: self.add_properties(self.loader.deduced_properties) # set selected columns to loader self.loader.select_columns( self.properties, include_all=bool(self.raw_properties is None)) def __str__(self) -> str: s = "\ntype: VertexLabel" s += "\nlabel: " + self.label s += "\nproperties: " + str(self.properties) s += "\nvid: " + str(self.vid_field) s += "\nid_type: " + self.id_type s += "\nloader: " + repr(self.loader) return s def __repr__(self) -> str: return self.__str__() def add_property(self, prop: str, dtype=None) -> None: """prop is a str, representing name. It can optionally have a type.""" self.properties.append((prop, utils.unify_type(dtype))) def add_properties(self, properties: Sequence) -> None: for prop in properties: if isinstance(prop, str): self.add_property(prop) else: self.add_property(prop[0], prop[1]) def attr(self) -> Sequence[attr_value_pb2.Chunk]: chunk = attr_value_pb2.Chunk() chunk.attr[types_pb2.CHUNK_NAME].CopyFrom(utils.s_to_attr("vertex")) chunk.attr[types_pb2.CHUNK_TYPE].CopyFrom(utils.s_to_attr("loader")) chunk.attr[types_pb2.LABEL].CopyFrom(utils.s_to_attr(self.label)) chunk.attr[types_pb2.VID].CopyFrom(utils.s_to_attr(str( self.vid_field))) if isinstance(self._vformat, str): chunk.attr[types_pb2.VFORMAT].CopyFrom( utils.s_to_attr(self._vformat)) # loader for k, v in self.loader.get_attr().items(): # raw bytes for pandas/numpy data if k == types_pb2.VALUES: chunk.buffer = v else: chunk.attr[k].CopyFrom(v) return [chunk]
class EdgeSubLabel(object): """Hold meta informations of a single relationship. i.e. src_label -> edge_label -> dst_label """ def __init__( self, loader, properties=None, src_label: str = "_", dst_label: str = "_", src_field: Union[str, int] = 0, dst_field: Union[str, int] = 1, load_strategy="both_out_in", ): if isinstance(loader, Loader): self.loader = loader else: self.loader = Loader(loader) self.raw_properties = properties self.properties = [] self.src_label = src_label self.dst_label = dst_label self.src_field = src_field self.dst_field = dst_field self._finished = False check_argument( load_strategy in ("only_out", "only_in", "both_out_in"), "invalid load strategy: " + load_strategy, ) self.load_strategy = load_strategy if (isinstance(self.src_field, int) and isinstance(self.dst_field, str)) or ( isinstance(self.src_field, str) and isinstance(self.dst_field, int) ): print("src field", self.src_field, "dst_field", self.dst_field) raise SyntaxError( "Source vid and destination vid must have same formats, both use name or both use index" ) def finish(self, id_type: str = "int64_t"): if self._finished: return self.add_property(str(self.src_field), id_type) self.add_property(str(self.dst_field), id_type) if self.raw_properties: self.add_properties(self.raw_properties) elif self.loader.deduced_properties: self.add_properties(self.loader.deduced_properties) self.loader.select_columns( self.properties, include_all=bool(not self.raw_properties) ) self.loader.finish() self._finished = True def __str__(self) -> str: s = "\ntype: EdgeSubLabel" s += "\nsource_label: " + self.src_label s += "\ndestination_label: " + self.dst_label s += "\nproperties: " + str(self.properties) s += "\nloader: " + repr(self.loader) return s def __repr__(self) -> str: return self.__str__() def add_property(self, prop: str, dtype=None) -> None: """prop is a str, representing name. It can optionally have a type.""" self.properties.append((prop, utils.unify_type(dtype))) def add_properties(self, properties: Sequence) -> None: for prop in properties: if isinstance(prop, str): self.add_property(prop) else: self.add_property(prop[0], prop[1]) def get_attr(self): attr_list = attr_value_pb2.NameAttrList() attr_list.name = "{}_{}".format(self.src_label, self.dst_label) attr_list.attr[types_pb2.SRC_LABEL].CopyFrom(utils.s_to_attr(self.src_label)) attr_list.attr[types_pb2.DST_LABEL].CopyFrom(utils.s_to_attr(self.dst_label)) attr_list.attr[types_pb2.LOAD_STRATEGY].CopyFrom( utils.s_to_attr(self.load_strategy) ) attr_list.attr[types_pb2.SRC_VID].CopyFrom(utils.s_to_attr(str(self.src_field))) attr_list.attr[types_pb2.DST_VID].CopyFrom(utils.s_to_attr(str(self.dst_field))) attr_list.attr[types_pb2.LOADER].CopyFrom(self.loader.get_attr()) props = [] for prop in self.properties[2:]: prop_attr = attr_value_pb2.NameAttrList() prop_attr.name = prop[0] prop_attr.attr[0].CopyFrom(utils.type_to_attr(prop[1])) props.append(prop_attr) attr_list.attr[types_pb2.PROPERTIES].list.func.extend(props) return attr_list
class EdgeSubLabel(object): """Hold meta informations of a single relationship. i.e. src_label -> edge_label -> dst_label """ def __init__( self, loader, properties=None, src_label: str = "_", dst_label: str = "_", src_field: Union[str, int] = 0, dst_field: Union[str, int] = 1, load_strategy="both_out_in", id_type: str = "int64_t", eformat=None, ): if isinstance(loader, Loader): self.loader = loader else: self.loader = Loader(loader) # raw properties passed by user parameters self.raw_properties = properties # finally properties for constructing graph self.properties = [] # type of vertex original id # should be consistent with the original graph self.id_type = id_type self.src_label = src_label self.dst_label = dst_label self.src_field = src_field self.dst_field = dst_field # check avaiable check_argument( load_strategy in ("only_out", "only_in", "both_out_in"), "invalid load strategy: " + load_strategy, ) self.load_strategy = load_strategy if (isinstance(self.src_field, int) and isinstance( self.dst_field, str)) or (isinstance(self.src_field, str) and isinstance(self.dst_field, int)): print("src field", self.src_field, "dst_field", self.dst_field) raise SyntaxError( "Source vid and destination vid must have same formats, both use name or both use index" ) # normalize properties # add src/dst to property list self.add_property(str(self.src_field), self.id_type) self.add_property(str(self.dst_field), self.id_type) if self.raw_properties: self.add_properties(self.raw_properties) elif self.loader.deduced_properties: self.add_properties(self.loader.deduced_properties) # set selected columns to loader self.loader.select_columns( self.properties, include_all=bool(self.raw_properties is None)) self._eformat = eformat def __str__(self) -> str: s = "\ntype: EdgeSubLabel" s += "\nsource_label: " + self.src_label s += "\ndestination_label: " + self.dst_label s += "\nproperties: " + str(self.properties) s += "\nloader: " + repr(self.loader) return s def __repr__(self) -> str: return self.__str__() def add_property(self, prop: str, dtype=None) -> None: """prop is a str, representing name. It can optionally have a type.""" self.properties.append((prop, utils.unify_type(dtype))) def add_properties(self, properties: Sequence) -> None: for prop in properties: if isinstance(prop, str): self.add_property(prop) else: self.add_property(prop[0], prop[1]) def get_attr(self) -> attr_value_pb2.Chunk: chunk = attr_value_pb2.Chunk() chunk.attr[types_pb2.SUB_LABEL].CopyFrom( utils.s_to_attr("{}_{}".format(self.src_label, self.dst_label))) chunk.attr[types_pb2.SRC_LABEL].CopyFrom( utils.s_to_attr(self.src_label)) chunk.attr[types_pb2.DST_LABEL].CopyFrom( utils.s_to_attr(self.dst_label)) chunk.attr[types_pb2.LOAD_STRATEGY].CopyFrom( utils.s_to_attr(self.load_strategy)) chunk.attr[types_pb2.SRC_VID].CopyFrom( utils.s_to_attr(str(self.src_field))) chunk.attr[types_pb2.DST_VID].CopyFrom( utils.s_to_attr(str(self.dst_field))) if isinstance(self._eformat, str): chunk.attr[types_pb2.EFORMAT].CopyFrom( utils.s_to_attr(self._eformat)) # loader for k, v in self.loader.get_attr().items(): # raw bytes for pandas/numpy data if k == types_pb2.VALUES: chunk.buffer = v else: chunk.attr[k].CopyFrom(v) return chunk