import datetime from eye.eye import Eye from analyzer.analyzer import Analyzer from sender.sender import Sender from config.config import Config configuration = Config() eye = Eye() analyzer = Analyzer() sender = Sender() frame = eye.capture() while True: data = analyzer.analyze(frame) sender.send(data) frame = eye.capture()
class ATC: section = "AvailableOptions" def __init__(self): # Initialising fields self.parameters = {} # Loading config self.config = self.loadConfig() self.analyzer = Analyzer(self.config) global analyzer_global analyzer_global = self.analyzer # Selecting mode if len(sys.argv) > 1: self.parse_args() self.analyzer.error_occurred.connect(self.print_error) filename = self.parameters["input"] if not os.path.exists(filename): self.print_error("File {} does not exist".format(filename)) sys.exit() try: text = self.analyzer.load_file(self.parameters["input"]) if not self.analyzer.isTextValid(text): self.print_error( "File {} does not contain valid text".format(filename)) sys.exit() except Exception as e: self.print_error("Error loading file {}:\n{}".format( filename, e)) sys.exit() result = self.analyzer.analyze(text, self.parameters) if result is None: self.print_error("Unknown error occurred") sys.exit() result.save_to_file(self.parameters["output"], self.parameters["threshold"], n_digits=5) sys.exit(0) else: show_splashscreen() self.ui = GUI(analyzer=self.analyzer, config=self.config) def parse_args(self): description = "Automated Text Classifier for VINITI. Чтобы запустить графический сеанс, " \ "запустите программу без аргументов" argparser = ArgumentParser(prog="ATC", description=description) formats = self.config.get(self.section, "formats").split(", ") languages = self.config.get(self.section, "languages").split(", ") norm_options = self.config.get(self.section, "norm_predict").split(", ") argparser.add_argument("-i", "--input", help="полный путь к файлу с текстом", required=True) # type=unescaped_str argparser.add_argument( "-o", "--output", help="полный путь к файлу, в который будет записан результат", required=True) argparser.add_argument("-id", "--rubricator-id", help="идентификатор рубрикатора", required=True) argparser.add_argument("-f", "--format", help="формат входного файла", choices=formats, required=False) argparser.add_argument("-l", "--language", help="язык входного текста", choices=languages, required=True) argparser.add_argument("-t", "--threshold", help="пороговое значение вероятности. " + "Ответы классификатора с вероятностью ниже " + "заданной выведены не будут", default=0.0, type=float, required=False) argparser.add_argument( "-n", "--normalize", help="нормировать ли предсказание классификатора", choices=norm_options, required=False, default="not") subparsers = argparser.add_subparsers(help="Commands") # Creating server command server_parser = subparsers.add_parser("server", help="запустить режим сервера") server_parser.add_argument( "port", help="номер порта, на котором запустить сервер", action=LaunchServer, type=int) self.parameters = vars(argparser.parse_args()) @staticmethod def print_error(error_msg: str): print(error_msg, file=sys.stderr) @staticmethod def loadConfig(): parser = ConfigParser() parser.read([os.path.join(os.path.dirname(__file__), "config.ini")], encoding="utf-8") return parser
class PlagiarismTest(unittest.TestCase): def setUp(self): self.analyzer = Analyzer() self.supervisor = Supervisor() self.test_data = read_test_data() def tearDown(self): pass def test_find_similarity_groups_by_rows(self): similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9], [11, 5, 0, 7, 3], [7, 12, 8, 0, 14], [2, 10, 4, 12, 0]]) result = self.analyzer.find_similarity_groups_by_rows( similarity_matrix) self.assertEqual(result[1], 1) self.assertEqual(result[2], 2) self.assertEqual(result[3], 1) self.assertEqual(result[4], 3) def test_find_similarity_groups_by_mean_group_similarity(self): similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9], [11, 5, 0, 7, 3], [7, 12, 8, 0, 14], [2, 10, 4, 12, 0]]) result = self.analyzer.find_similarity_groups_by_mean_group_similarity( similarity_matrix) self.assertEqual(result[1], 1) self.assertEqual(result[2], 2) self.assertEqual(result[3], 1) self.assertEqual(result[4], 1) def test_analyze_test_via_supervisor(self): text = self.test_data["observer_test_text"] result = self.supervisor.markup(text) self.assertEqual(result.text, text) self.assertEqual(result.num_letters, 377) self.assertEqual(result.num_sentences, 10) self.assertEqual(result.num_words, 62) self.assertEqual(len(result.tokens), 62) self.assertEqual(len(result.morph_tokens), 62) def test_create_plagiarism_matrix(self): essays = self.test_data["plagiarism_test_essays"] essays = [self.supervisor.markup(essay) for essay in essays] matrix, coincidences = plagiarism.create_plagiarism_matrix(essays) self.assertEqual((matrix.shape == np.array([len(essays), len(essays)])).all(), True) # для полюсного эссе номер 1 self.assertEqual(matrix[0, 1] == 100, True) # эссе имеет сходство по 100% предолжений self.assertEqual(matrix[0, 2] > 33 and matrix[0, 2] < 65, True) # эссе имеет сходство по 50% предолжений self.assertEqual(matrix[0, 3] == 0, True) # для полюсного эссе номер 2 self.assertEqual(matrix[3, 0] == 0, True) self.assertEqual(matrix[3, 1] == 0, True) # эссе имеет сходство по 0% предолжений self.assertEqual(matrix[3, 2] > 35 and matrix[3, 2] < 65, True) # эссе имеет сходство по 50% предолжений def test_similarity_matrix(self): essays = self.test_data["similarity_test_essays"] essays = [self.supervisor.markup(essay) for essay in essays] result = similarity.create_similarity_matrix(essays) self.assertEqual((result.shape == np.array([len(essays), len(essays)])).all(), True) # для эссе номер 1 self.assertEqual(result[0, 1] > 13, True) # эссе на ту же тему self.assertEqual(result[0, 2] > 4 and result[0, 2] < 10, True) # эссе на пересекающуюся тему self.assertEqual(result[0, 3] < 4, True) # эссе на обособленную тему def test_read_from_file(self): expected_text = self.test_data["lecture_reader_expected_text"].strip() presentation_text = lecture_reader.read_from_file( TEST_PRESENTATION_PATH).strip() self.assertEqual(presentation_text, expected_text) def test_analyzer(self): lecture = Presentation(TEST_PRESENTATION_PATH) lecture_text = read_from_presentation(lecture) essays = self.test_data["analyzer_test_essays"] pd_essays = pd.DataFrame(data=essays, columns=["text"]) report = self.analyzer.analyze(lecture_text, pd_essays) self.assert_lecture(self.test_data["lecture_reader_expected_text"], report.lecture, 176) self.assert_essay(essays[0], report.essays[0], GradeType.FAIL, 1, [LabelType.FAIL, LabelType.LECTURE_PLAGIARISM], 302) self.assert_essay(essays[1], report.essays[1], GradeType.SUCCESS, 1, [LabelType.SUCCESS], 338) self.assert_essay(essays[2], report.essays[2], GradeType.FAIL, 2, [LabelType.FAIL], 246) def assert_lecture(self, text, lecture, num_words): self.assertEqual(lecture.text, text) self.assertEqual(lecture.statistic.num_letters, len(text)) self.assertEqual(lecture.statistic.num_sentences, len(sent_tokenize(text))) self.assertEqual(lecture.statistic.num_words, num_words) def assert_essay(self, text, essay, grade, group, labels, num_words): self.assertEqual(essay.text, text) self.assertEqual(essay.statistic.num_letters, len(text)) self.assertEqual(essay.statistic.num_sentences, len(sent_tokenize(text))) self.assertEqual(essay.statistic.num_words, num_words) self.assertEqual(essay.grade, grade) self.assertEqual(essay.group, group) self.assertEqual(len(labels), len(essay.labels)) for label in essay.labels: if label.type in labels: labels.remove(label.type) else: self.assertEqual(True, False) self.assertEqual(len(labels), 0) def test_analyzer_incorrect_essay_list(self): lecture = Presentation(TEST_PRESENTATION_PATH) essays = self.test_data["analyzer_test_essays"] pd_essays = pd.DataFrame(data=essays, columns=["roles"]) self.assertRaises(NotFoundEssayColumn, self.analyzer.analyze, lecture, pd_essays)
class EstimatorServer(): def __init__(self, gdrive_certificat_path): self.server = Flask(__name__, static_url_path='', static_folder='frontend/static') self.socketio = SocketIO(self.server) self.session_maker = run_orm() self.analyzer = Analyzer() self.drive = Drive(gdrive_certificat_path) self.server.route('/', methods=['GET'])(self.index) self.server.route('/upload', methods=['POST'])(self.upload_task) self.server.route('/end_check', methods=['POST'])(self.end_check) def item_callback(self, current_item_id, count_items): self.socketio.emit( 'changed-report-status', json.dumps({ 'status': 'handling', 'description': f'Обработано ответов из архива {current_item_id} из {count_items}' })) def load_lecture_file(self, file): extension = re.findall(r'\.\w+$', file.filename)[0] if extension == '.pptx': lecture = Presentation(file) return read_from_presentation(lecture) if extension == '.txt': wrapper = io.TextIOWrapper(file, encoding='utf-8') return wrapper.read() if extension == '.docx': lecture = docx.Document(file) fullText = [] for para in lecture.paragraphs: fullText.append(para.text) return ' '.join(fullText) raise NotSupportLectureExtensionType(extension) def load_essays_file(self, file): extension = re.findall(r'\.\w+$', file.filename)[0] if extension == '.csv': return pd.read_csv(file) if extension == '.xlsx': return pd.read_excel(file) if extension == '.zip': archive = zipfile.ZipFile(file, 'r') return download_archive(self.drive, archive, self.item_callback) raise NotSupportEssayExtensionType(extension) def index(self): return self.server.send_static_file("index.html") def upload_task(self): try: if 'lecture' not in request.files or 'essays' not in request.files: return redirect(request.url) lecture = request.files['lecture'] essays = request.files['essays'] if lecture.filename == '' or essays.filename == '': return redirect(request.url) self.socketio.emit( 'changed-report-status', json.dumps({ 'status': 'handling', "description": "Обработка файлов" })) lecture = self.load_lecture_file(lecture) essays = self.load_essays_file(essays) essays = essays.dropna(axis=0) self.socketio.emit( 'changed-report-status', json.dumps({ 'status': 'handling', "description": "Оценка эссе" })) report = self.analyzer.analyze(lecture, essays) report_schema = ReportSchema() session = self.session_maker() session.add(report) session.commit() self.socketio.emit('changed-report-status', json.dumps({'status': 'handled'})) return report_schema.dump(report) except Exception as e: print(e) print(traceback.print_exc()) if type(e) == NotFoundEssayColumn or type(e) == NotSupportEssayExtensionType\ or type(e) == NotSupportLectureExtensionType: return json.dumps({"status": "error", "text": str(e)}), 500 else: return json.dumps({ "status": "error", "text": "Ошибка оценки загруженных эссе" }), 500 def end_check(self): try: session = self.session_maker() report_schema = ReportSchema() json_data = json.loads(request.data) report = report_schema.load(data=json_data, session=session) if report.lecture is None: raise Exception("Received incorrect data. Report not found") session.commit() return json.dumps({ "status": "success", "text": "Результаты проверки эссе успешно сохранены" }) except Exception as e: print(e) print(traceback.print_exc()) session.close() return json.dumps({ "status": "error", "text": "Ошибка сохранения резултатов проверки" }), 500 def start(self): self.socketio.run(self.server, host='localhost', port=5000)