def GetTopicModel(self, model=None, args=None, class_ids=None, topic_names=None, use_sparse_format=None, request_type=None): if args is None: args = messages_pb2.GetTopicModelArgs() if model is not None: args.model_name = model.name() if class_ids is not None: args.ClearField('class_id') for class_id in class_ids: args.class_id.append(class_id) if topic_names is not None: args.ClearField('topic_name') for topic_name in topic_names: args.topic_name.append(topic_name) if use_sparse_format is not None: args.use_sparse_format=use_sparse_format if request_type is not None: args.request_type = request_type args_blob = args.SerializeToString() length = HandleErrorCode(self.lib_, self.lib_.ArtmRequestTopicModel(self.id_, len(args_blob), args_blob)) topic_model_blob = ctypes.create_string_buffer(length) HandleErrorCode(self.lib_, self.lib_.ArtmCopyRequestResult(length, topic_model_blob)) topic_model = messages_pb2.TopicModel() topic_model.ParseFromString(topic_model_blob) return topic_model
def GetTopicModel(self, model): length = HandleErrorCode( self.lib_, self.lib_.ArtmRequestTopicModel(self.id_, model.name())) topic_model_blob = ctypes.create_string_buffer(length) HandleErrorCode( self.lib_, self.lib_.ArtmCopyRequestResult(length, topic_model_blob)) topic_model = messages_pb2.TopicModel() topic_model.ParseFromString(topic_model_blob) return topic_model
def Overwrite(self, topic_model, commit=True): copy_ = messages_pb2.TopicModel() copy_.CopyFrom(topic_model) copy_.name = self.name() blob = copy_.SerializeToString() blob_p = ctypes.create_string_buffer(blob) HandleErrorCode(self.lib_, self.lib_.ArtmOverwriteTopicModel(self.master_id_, len(blob), blob_p)) if commit: self.master_component.WaitIdle() self.Synchronize(decay_weight=0.0, apply_weight=1.0, invoke_regularizers=False)
model_config = messages_pb2.ModelConfig() model_config.topics_count = topics_count model_config.inner_iterations_count = inner_iterations_count model_config.score_name.append(perplexity_score_name) model_config.score_name.append(sparsity_theta_score_name) model_config.score_name.append(sparsity_phi_score_name) model_config.score_name.append(topic_kernel_score_name) # model_config.regularizer_name.append(regularizer_name_theta) # model_config.regularizer_tau.append(0.1) # model_config.regularizer_name.append(regularizer_name_decor) # model_config.regularizer_tau.append(200000) model = master_component.CreateModel(model_config) initial_topic_model = messages_pb2.TopicModel(); initial_topic_model.topics_count = topics_count; initial_topic_model.name = model.name() random.seed(123) for i in range(0, len(unique_tokens.entry)): token = unique_tokens.entry[i].key_token initial_topic_model.token.append(token); weights = initial_topic_model.token_weights.add(); for topic_index in range(0, topics_count): weights.value.append(random.random()) model.Overwrite(initial_topic_model) for iter in range(0, outer_iteration_count): master_component.InvokeIteration(1) master_component.WaitIdle(120000);