Beispiel #1
0
 def __init__(self, config, distance_metric):
     self.config = config
     self.distance_metric = distance_metric
     self.clustring_results = []
     self.log = logging.getLogger(__name__ + "." + __class__.__name__)
     self.log = logger.add_log_file(self.log,
                                    config.get("GENERAL", "logfile"))
 def __init__(self, config, documents, external_vec):
     self.config = config
     self.documents = (documents)
     self.num_of_words_per_doc = int(config.get("TF-IDF", "num_of_words_per_doc"))
     self.external_vec = external_vec
     self.stage = 1
     self.log = logging.getLogger(__name__ + "." + __class__.__name__)
     self.log = logger.add_log_file(self.log, config.get("GENERAL", "logfile"))
     self.build_chunks = build_chunks
 def __init__(self, filepath, config, id):
     self.log = logging.getLogger(__name__ + "." + __class__.__name__)
     self.log = logger.add_log_file(self.log,
                                    config.get("GENERAL", "logfile"))
     self.docID = id
     self.docPath = filepath
     self.basename = os.path.basename(filepath)
     self.docText = self.getText(self.docPath)
     self.chunks = []
     self.chunkSize = int(config.get("CHUNKS", "size"))
     self.DelayPar = int(config.get("CHUNKS", "delay"))
     self.cluster = None
 def __init__(self, config, documents, number_of_clusters, silhouette_width,
              max_chunks_in_doc):
     self.config = config
     self.documents = documents
     self.max_chunks_in_doc = max_chunks_in_doc
     self.number_of_chunks_styles = number_of_clusters
     self.silhouette_width = round(silhouette_width, 6)
     self.max_docs_in_style = None
     self.min_docs_in_style = None
     self.number_of_documents_styles = 0
     self.log = logging.getLogger(__name__ + "." + __class__.__name__)
     self.log = logger.add_log_file(self.log,
                                    config.get("GENERAL", "logfile"))
Beispiel #5
0
    def __init__(self, config, documents):
        self.config = config
        self.distance_metric = []
        self.documents = documents
        self.comparable_chunks = []
        self.chunks_index = {}
        self.transferred_vectors = []
        self.chebyshev_mat = []
        Distance_Matric.vectors_dict = {}
        Distance_Matric.corr_matrix = None

        self.log = logging.getLogger(__name__ + "." + __class__.__name__)
        self.log = logger.add_log_file(self.log,
                                       config.get("GENERAL", "logfile"))
 def __init__(self, config, text, docID, chunkID, model, preChunks=[]):
     self.chunk_size = config.get("CHUNKS", "size")
     self.delay = config.get("CHUNKS", "delay")
     self.log = logging.getLogger(__name__ + "." + __class__.__name__)
     self.log = logger.add_log_file(self.log,
                                    config.get("GENERAL", "logfile"))
     self.chunkID = chunkID
     self.chunkVec = None
     self.Doc = text
     self.docID = docID
     self.model = model
     self.cluster = None
     self.preChunks = preChunks
     self.ranked_vec = {}
     Chunk.createVec(self)
Beispiel #7
0
 def __init__(self, config, filepath=None, documents=[]):
     self.architecture = 1 if (config.get("Word2Vec", "arch")
                               == "Skip-Gram") else 0
     self.training_model = 1 if (config.get("Word2Vec", "training_model")
                                 == "Softmax") else 0
     self.window = int(config.get("Word2Vec", "context_window"))
     self.dimension = int(config.get("Word2Vec", "dimension"))
     self.delimiter = config.get("Word2Vec", "text_delimiter")
     self.file_path = filepath
     self.sentences = []
     self.vectors = None
     self.log = logging.getLogger(__name__ + "." + __class__.__name__)
     self.log = logger.add_log_file(self.log,
                                    config.get("GENERAL", "logfile"))
     if (documents):
         self.set_sentences(documents)