Beispiel #1
0
import datetime

from eye.eye import Eye
from analyzer.analyzer import Analyzer
from sender.sender import Sender
from config.config import Config

configuration = Config()

eye = Eye()
analyzer = Analyzer()
sender = Sender()

frame = eye.capture()
while True:
    data = analyzer.analyze(frame)

    sender.send(data)

    frame = eye.capture()
Beispiel #2
0
class ATC:
    section = "AvailableOptions"

    def __init__(self):
        # Initialising fields
        self.parameters = {}
        # Loading config
        self.config = self.loadConfig()
        self.analyzer = Analyzer(self.config)
        global analyzer_global
        analyzer_global = self.analyzer
        # Selecting mode
        if len(sys.argv) > 1:
            self.parse_args()
            self.analyzer.error_occurred.connect(self.print_error)
            filename = self.parameters["input"]
            if not os.path.exists(filename):
                self.print_error("File {} does not exist".format(filename))
                sys.exit()
            try:
                text = self.analyzer.load_file(self.parameters["input"])
                if not self.analyzer.isTextValid(text):
                    self.print_error(
                        "File {} does not contain valid text".format(filename))
                    sys.exit()
            except Exception as e:
                self.print_error("Error loading file {}:\n{}".format(
                    filename, e))
                sys.exit()
            result = self.analyzer.analyze(text, self.parameters)
            if result is None:
                self.print_error("Unknown error occurred")
                sys.exit()
            result.save_to_file(self.parameters["output"],
                                self.parameters["threshold"],
                                n_digits=5)
            sys.exit(0)
        else:
            show_splashscreen()
            self.ui = GUI(analyzer=self.analyzer, config=self.config)

    def parse_args(self):
        description = "Automated Text Classifier for VINITI. Чтобы запустить графический сеанс, " \
                      "запустите программу без аргументов"
        argparser = ArgumentParser(prog="ATC", description=description)
        formats = self.config.get(self.section, "formats").split(", ")
        languages = self.config.get(self.section, "languages").split(", ")
        norm_options = self.config.get(self.section,
                                       "norm_predict").split(", ")
        argparser.add_argument("-i",
                               "--input",
                               help="полный путь к файлу с текстом",
                               required=True)
        # type=unescaped_str
        argparser.add_argument(
            "-o",
            "--output",
            help="полный путь к файлу, в который будет записан результат",
            required=True)
        argparser.add_argument("-id",
                               "--rubricator-id",
                               help="идентификатор рубрикатора",
                               required=True)
        argparser.add_argument("-f",
                               "--format",
                               help="формат входного файла",
                               choices=formats,
                               required=False)
        argparser.add_argument("-l",
                               "--language",
                               help="язык входного текста",
                               choices=languages,
                               required=True)
        argparser.add_argument("-t",
                               "--threshold",
                               help="пороговое значение вероятности. " +
                               "Ответы классификатора с вероятностью ниже " +
                               "заданной выведены не будут",
                               default=0.0,
                               type=float,
                               required=False)
        argparser.add_argument(
            "-n",
            "--normalize",
            help="нормировать ли предсказание классификатора",
            choices=norm_options,
            required=False,
            default="not")
        subparsers = argparser.add_subparsers(help="Commands")
        # Creating server command
        server_parser = subparsers.add_parser("server",
                                              help="запустить режим сервера")
        server_parser.add_argument(
            "port",
            help="номер порта, на котором запустить сервер",
            action=LaunchServer,
            type=int)

        self.parameters = vars(argparser.parse_args())

    @staticmethod
    def print_error(error_msg: str):
        print(error_msg, file=sys.stderr)

    @staticmethod
    def loadConfig():
        parser = ConfigParser()
        parser.read([os.path.join(os.path.dirname(__file__), "config.ini")],
                    encoding="utf-8")
        return parser
Beispiel #3
0
class PlagiarismTest(unittest.TestCase):
    def setUp(self):
        self.analyzer = Analyzer()
        self.supervisor = Supervisor()
        self.test_data = read_test_data()

    def tearDown(self):
        pass

    def test_find_similarity_groups_by_rows(self):
        similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9],
                                        [11, 5, 0, 7, 3], [7, 12, 8, 0, 14],
                                        [2, 10, 4, 12, 0]])

        result = self.analyzer.find_similarity_groups_by_rows(
            similarity_matrix)
        self.assertEqual(result[1], 1)
        self.assertEqual(result[2], 2)
        self.assertEqual(result[3], 1)
        self.assertEqual(result[4], 3)

    def test_find_similarity_groups_by_mean_group_similarity(self):
        similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9],
                                        [11, 5, 0, 7, 3], [7, 12, 8, 0, 14],
                                        [2, 10, 4, 12, 0]])
        result = self.analyzer.find_similarity_groups_by_mean_group_similarity(
            similarity_matrix)
        self.assertEqual(result[1], 1)
        self.assertEqual(result[2], 2)
        self.assertEqual(result[3], 1)
        self.assertEqual(result[4], 1)

    def test_analyze_test_via_supervisor(self):
        text = self.test_data["observer_test_text"]
        result = self.supervisor.markup(text)
        self.assertEqual(result.text, text)
        self.assertEqual(result.num_letters, 377)
        self.assertEqual(result.num_sentences, 10)
        self.assertEqual(result.num_words, 62)
        self.assertEqual(len(result.tokens), 62)
        self.assertEqual(len(result.morph_tokens), 62)

    def test_create_plagiarism_matrix(self):
        essays = self.test_data["plagiarism_test_essays"]
        essays = [self.supervisor.markup(essay) for essay in essays]
        matrix, coincidences = plagiarism.create_plagiarism_matrix(essays)

        self.assertEqual((matrix.shape == np.array([len(essays),
                                                    len(essays)])).all(), True)
        # для полюсного эссе номер 1
        self.assertEqual(matrix[0, 1] == 100,
                         True)  # эссе имеет сходство по 100% предолжений
        self.assertEqual(matrix[0, 2] > 33 and matrix[0, 2] < 65,
                         True)  # эссе имеет сходство по 50% предолжений
        self.assertEqual(matrix[0, 3] == 0, True)

        # для полюсного эссе номер 2
        self.assertEqual(matrix[3, 0] == 0, True)
        self.assertEqual(matrix[3, 1] == 0,
                         True)  # эссе имеет сходство по 0% предолжений
        self.assertEqual(matrix[3, 2] > 35 and matrix[3, 2] < 65,
                         True)  # эссе имеет сходство по 50% предолжений

    def test_similarity_matrix(self):
        essays = self.test_data["similarity_test_essays"]
        essays = [self.supervisor.markup(essay) for essay in essays]
        result = similarity.create_similarity_matrix(essays)

        self.assertEqual((result.shape == np.array([len(essays),
                                                    len(essays)])).all(), True)
        # для эссе номер 1
        self.assertEqual(result[0, 1] > 13, True)  # эссе на ту же тему
        self.assertEqual(result[0, 2] > 4 and result[0, 2] < 10,
                         True)  # эссе на пересекающуюся тему
        self.assertEqual(result[0, 3] < 4, True)  # эссе на обособленную тему

    def test_read_from_file(self):
        expected_text = self.test_data["lecture_reader_expected_text"].strip()
        presentation_text = lecture_reader.read_from_file(
            TEST_PRESENTATION_PATH).strip()
        self.assertEqual(presentation_text, expected_text)

    def test_analyzer(self):
        lecture = Presentation(TEST_PRESENTATION_PATH)
        lecture_text = read_from_presentation(lecture)
        essays = self.test_data["analyzer_test_essays"]
        pd_essays = pd.DataFrame(data=essays, columns=["text"])
        report = self.analyzer.analyze(lecture_text, pd_essays)
        self.assert_lecture(self.test_data["lecture_reader_expected_text"],
                            report.lecture, 176)
        self.assert_essay(essays[0], report.essays[0], GradeType.FAIL, 1,
                          [LabelType.FAIL, LabelType.LECTURE_PLAGIARISM], 302)
        self.assert_essay(essays[1], report.essays[1], GradeType.SUCCESS, 1,
                          [LabelType.SUCCESS], 338)
        self.assert_essay(essays[2], report.essays[2], GradeType.FAIL, 2,
                          [LabelType.FAIL], 246)

    def assert_lecture(self, text, lecture, num_words):
        self.assertEqual(lecture.text, text)
        self.assertEqual(lecture.statistic.num_letters, len(text))
        self.assertEqual(lecture.statistic.num_sentences,
                         len(sent_tokenize(text)))
        self.assertEqual(lecture.statistic.num_words, num_words)

    def assert_essay(self, text, essay, grade, group, labels, num_words):
        self.assertEqual(essay.text, text)
        self.assertEqual(essay.statistic.num_letters, len(text))
        self.assertEqual(essay.statistic.num_sentences,
                         len(sent_tokenize(text)))
        self.assertEqual(essay.statistic.num_words, num_words)
        self.assertEqual(essay.grade, grade)
        self.assertEqual(essay.group, group)
        self.assertEqual(len(labels), len(essay.labels))
        for label in essay.labels:
            if label.type in labels:
                labels.remove(label.type)
            else:
                self.assertEqual(True, False)

        self.assertEqual(len(labels), 0)

    def test_analyzer_incorrect_essay_list(self):
        lecture = Presentation(TEST_PRESENTATION_PATH)
        essays = self.test_data["analyzer_test_essays"]
        pd_essays = pd.DataFrame(data=essays, columns=["roles"])
        self.assertRaises(NotFoundEssayColumn, self.analyzer.analyze, lecture,
                          pd_essays)
Beispiel #4
0
class EstimatorServer():
    def __init__(self, gdrive_certificat_path):
        self.server = Flask(__name__,
                            static_url_path='',
                            static_folder='frontend/static')

        self.socketio = SocketIO(self.server)
        self.session_maker = run_orm()
        self.analyzer = Analyzer()
        self.drive = Drive(gdrive_certificat_path)

        self.server.route('/', methods=['GET'])(self.index)
        self.server.route('/upload', methods=['POST'])(self.upload_task)
        self.server.route('/end_check', methods=['POST'])(self.end_check)

    def item_callback(self, current_item_id, count_items):
        self.socketio.emit(
            'changed-report-status',
            json.dumps({
                'status':
                'handling',
                'description':
                f'Обработано ответов из архива {current_item_id} из {count_items}'
            }))

    def load_lecture_file(self, file):
        extension = re.findall(r'\.\w+$', file.filename)[0]

        if extension == '.pptx':
            lecture = Presentation(file)
            return read_from_presentation(lecture)
        if extension == '.txt':
            wrapper = io.TextIOWrapper(file, encoding='utf-8')
            return wrapper.read()
        if extension == '.docx':
            lecture = docx.Document(file)
            fullText = []
            for para in lecture.paragraphs:
                fullText.append(para.text)
            return ' '.join(fullText)

        raise NotSupportLectureExtensionType(extension)

    def load_essays_file(self, file):
        extension = re.findall(r'\.\w+$', file.filename)[0]

        if extension == '.csv':
            return pd.read_csv(file)
        if extension == '.xlsx':
            return pd.read_excel(file)
        if extension == '.zip':
            archive = zipfile.ZipFile(file, 'r')
            return download_archive(self.drive, archive, self.item_callback)

        raise NotSupportEssayExtensionType(extension)

    def index(self):
        return self.server.send_static_file("index.html")

    def upload_task(self):
        try:
            if 'lecture' not in request.files or 'essays' not in request.files:
                return redirect(request.url)

            lecture = request.files['lecture']
            essays = request.files['essays']

            if lecture.filename == '' or essays.filename == '':
                return redirect(request.url)

            self.socketio.emit(
                'changed-report-status',
                json.dumps({
                    'status': 'handling',
                    "description": "Обработка файлов"
                }))

            lecture = self.load_lecture_file(lecture)
            essays = self.load_essays_file(essays)
            essays = essays.dropna(axis=0)

            self.socketio.emit(
                'changed-report-status',
                json.dumps({
                    'status': 'handling',
                    "description": "Оценка эссе"
                }))

            report = self.analyzer.analyze(lecture, essays)
            report_schema = ReportSchema()
            session = self.session_maker()
            session.add(report)
            session.commit()

            self.socketio.emit('changed-report-status',
                               json.dumps({'status': 'handled'}))
            return report_schema.dump(report)

        except Exception as e:
            print(e)
            print(traceback.print_exc())

            if type(e) == NotFoundEssayColumn or type(e) == NotSupportEssayExtensionType\
                    or type(e) == NotSupportLectureExtensionType:
                return json.dumps({"status": "error", "text": str(e)}), 500
            else:
                return json.dumps({
                    "status": "error",
                    "text": "Ошибка оценки загруженных эссе"
                }), 500

    def end_check(self):
        try:
            session = self.session_maker()
            report_schema = ReportSchema()
            json_data = json.loads(request.data)
            report = report_schema.load(data=json_data, session=session)

            if report.lecture is None:
                raise Exception("Received incorrect data. Report not found")

            session.commit()
            return json.dumps({
                "status":
                "success",
                "text":
                "Результаты проверки эссе успешно сохранены"
            })

        except Exception as e:
            print(e)
            print(traceback.print_exc())
            session.close()
            return json.dumps({
                "status": "error",
                "text": "Ошибка сохранения резултатов проверки"
            }), 500

    def start(self):
        self.socketio.run(self.server, host='localhost', port=5000)