Ejemplo n.º 1
0
    def createStatistics(self, statisticObject, readerSourceData, normalization, sourceCustomCallback=None):
        """

        :param statisticObject:  объект для создания статистики
        :param readerSourceData:  объект для получения полных путей к файлам
        :param normalization:  объект для нормализации данных
        :param sourceCustomCallback:  колбэк для объекта получения настроек (верификация путей)
        """
        if not statisticObject:
            raise ParamError("statisticObject cannot be the None-object")
        if not isinstance(statisticObject, Statistic):
            raise TypeError("statisticObject can be the list Statistic")

        if not readerSourceData:
            raise ParamError("readerSourceData cannot be the None-object")
        if not isinstance(readerSourceData, ReaderSourceData):
            raise TypeError("readerSourceData can be the list ReaderSourceData")

        if sourceCustomCallback and not isinstance(sourceCustomCallback, SourceCustomCallback):
            raise TypeError("sourceCustomCallback can be the list SourceCustomCallback")

        de_object = DetectEncoding()
        fileSourceCustom = FileSourceCustom()
        for itemFile in readerSourceData.getSourceCustom(sourceCustomCallback):
            file_type = magic.from_file(itemFile.decode(de_object.getEncode(itemFile)), mime=True)
            fileSourceCustom.custom = itemFile

            source = None
            if file_type == MIME_TEXT:
                source = self._source['text']
            elif file_type == MIME_WORD:
                source = self._source['word']

            if source:
                statisticObject.makeDocStatisticCustom(source, fileSourceCustom, normalization)
Ejemplo n.º 2
0
 def testGetSourceName(self):
     firstPath = os.path.join(self.__dirPath, "resources/first")
     secondPath = os.path.join(self.__dirPath, "resources/second")
     readerNameFS = ReaderNameFS([firstPath, secondPath])
     fileSource = FileSource()
     fileSourceCustom = FileSourceCustom()
     for itemFile in readerNameFS.getSourceCustom():
         fileSourceCustom.custom = itemFile
         openSource = fileSource.openSource(fileSourceCustom.custom)
         self.assertIsNotNone(fileSource.getName(openSource), "filename is not to be a None object")
         fileSource.closeSource(openSource)
Ejemplo n.º 3
0
 def setUp(self):
     self.__mongoUtils = MongoSaveUtils(HOST, PORT, USR, PWD, DB, FC_N,
                                        FC_DN, MDN)
     self.__simpleNormal = SimpleNormalization()
     self.__fileSourceCustom = FileSourceCustom()
     self.__fileBlockSource = FileBlockSource()
     self.__calcMongo = CalcMongo()
     self.__mongoStatistics = MongoStatistic(self.__mongoUtils)
     self.__dirPath = os.path.abspath(os.curdir)