def __init__(self, config, distance_metric): self.config = config self.distance_metric = distance_metric self.clustring_results = [] self.log = logging.getLogger(__name__ + "." + __class__.__name__) self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile"))
def __init__(self, config, documents, external_vec): self.config = config self.documents = (documents) self.num_of_words_per_doc = int(config.get("TF-IDF", "num_of_words_per_doc")) self.external_vec = external_vec self.stage = 1 self.log = logging.getLogger(__name__ + "." + __class__.__name__) self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile")) self.build_chunks = build_chunks
def __init__(self, filepath, config, id): self.log = logging.getLogger(__name__ + "." + __class__.__name__) self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile")) self.docID = id self.docPath = filepath self.basename = os.path.basename(filepath) self.docText = self.getText(self.docPath) self.chunks = [] self.chunkSize = int(config.get("CHUNKS", "size")) self.DelayPar = int(config.get("CHUNKS", "delay")) self.cluster = None
def __init__(self, config, documents, number_of_clusters, silhouette_width, max_chunks_in_doc): self.config = config self.documents = documents self.max_chunks_in_doc = max_chunks_in_doc self.number_of_chunks_styles = number_of_clusters self.silhouette_width = round(silhouette_width, 6) self.max_docs_in_style = None self.min_docs_in_style = None self.number_of_documents_styles = 0 self.log = logging.getLogger(__name__ + "." + __class__.__name__) self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile"))
def __init__(self, config, documents): self.config = config self.distance_metric = [] self.documents = documents self.comparable_chunks = [] self.chunks_index = {} self.transferred_vectors = [] self.chebyshev_mat = [] Distance_Matric.vectors_dict = {} Distance_Matric.corr_matrix = None self.log = logging.getLogger(__name__ + "." + __class__.__name__) self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile"))
def __init__(self, config, text, docID, chunkID, model, preChunks=[]): self.chunk_size = config.get("CHUNKS", "size") self.delay = config.get("CHUNKS", "delay") self.log = logging.getLogger(__name__ + "." + __class__.__name__) self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile")) self.chunkID = chunkID self.chunkVec = None self.Doc = text self.docID = docID self.model = model self.cluster = None self.preChunks = preChunks self.ranked_vec = {} Chunk.createVec(self)
def __init__(self, config, filepath=None, documents=[]): self.architecture = 1 if (config.get("Word2Vec", "arch") == "Skip-Gram") else 0 self.training_model = 1 if (config.get("Word2Vec", "training_model") == "Softmax") else 0 self.window = int(config.get("Word2Vec", "context_window")) self.dimension = int(config.get("Word2Vec", "dimension")) self.delimiter = config.get("Word2Vec", "text_delimiter") self.file_path = filepath self.sentences = [] self.vectors = None self.log = logging.getLogger(__name__ + "." + __class__.__name__) self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile")) if (documents): self.set_sentences(documents)