def main(): c: Configuration = Configuration() # set configuration params c.simulator_write_debug_level = DebugLevel.HIGH main_runner = MainRunner(c) analyzer = Analyzer(main_runner.main_services) analyzer.analyze_algorithms()
def main(): opt = OptionParser() opt.parser.add_argument('target', help='target file path', type=str) opt_args = opt.get_commandline_arguments() LOG.reset_logger(opt_args) target = opt_args.target person_names = [] is_debug = opt_args.debug analyzer = Analyzer() analyzer.execute(target, person_names, is_debug) print('>> FINISHED!!') return 0
def analyze_movie(file): analyzer = Analyzer(file) video = Video.objects.create(file=file) for t, frame in enumerate(analyzer.get_histogram()): r, g, b = frame print("Frame:" + str(t)) for num_bin in range(0, len(r)): Histogram.objects.create(video=video, t=t, num_bin=num_bin, bin_value=r[num_bin], color=Histogram.RED) Histogram.objects.create(video=video, t=t, num_bin=num_bin, bin_value=g[num_bin], color=Histogram.GREEN) Histogram.objects.create(video=video, t=t, num_bin=num_bin, bin_value=b[num_bin], color=Histogram.BLUE) print("Finished")
def run(self): if self.main_services.settings.generator: Generator.main(self) elif self.main_services.settings.trainer: Trainer.main(self) elif self.main_services.settings.analyzer: Analyzer.main(self) elif self.main_services.settings.load_simulator: simulator: Simulator = Simulator(self.main_services) simulator.start() if self.main_services.settings.clear_cache: self.main_services.resources.cache_dir.clear()
def __init__(self, gdrive_certificat_path): self.server = Flask(__name__, static_url_path='', static_folder='frontend/static') self.socketio = SocketIO(self.server) self.session_maker = run_orm() self.analyzer = Analyzer() self.drive = Drive(gdrive_certificat_path) self.server.route('/', methods=['GET'])(self.index) self.server.route('/upload', methods=['POST'])(self.upload_task) self.server.route('/end_check', methods=['POST'])(self.end_check)
def main(argv): print("#" * 64) print("##" + " " * 23 + "KICKASS DISASS" + " " * 23 + "##") print("##" + " " * 60 + "##") print("##" + "Powered by Capstone {:d}.{:d} ##".format( CS_API_MAJOR, CS_API_MINOR).rjust(62)) print("#" * 64 + "\n") if len(argv) <= 1: raise Exception("Usage: python3 main.py binary") loaders = load_modules() # open file and check for loaders that are able to disass the file with open(argv[1], 'rb') as f: valid_loaders = [l for l in loaders if l.accept_file(f)] if not valid_loaders: print("+ No loaders recognize the file") sys.exit(-1) if len(valid_loaders) >= 2: loader = prompt_user_for_loader(valid_loaders) else: loader = valid_loaders[0] # launch the actual analysis anal = Analyzer(loader, f)
def _analyze_and_output(self, src: Story, person_names: list, is_debug: bool) -> ResultData: # serialize and compile as text mode = CompileMode.NOVEL_TEXT fmode = FormatMode.DEFAULT LOG.info('Serialize for Analyzer') result = assertion.is_instance(Serializer().execute(src, mode), ResultData) if not result.is_succeeded: return result tmp = assertion.is_instance(result.data, CodeList) LOG.info('Validate for Analyzer') result = assertion.is_instance(Validater().execute(tmp), ResultData) if not result.is_succeeded: return result tmp = assertion.is_instance(result.data, CodeList) LOG.info('Compile for Analyzer') result = assertion.is_instance( Compiler().execute(tmp, mode, {}, False, False), ResultData) if not result.is_succeeded: return result tmp = assertion.is_instance(result.data, RawData) LOG.info('Format for Analyzer') result = assertion.is_instance(Formatter().execute(tmp, fmode), ResultData) if not result.is_succeeded: return result tmp = assertion.is_instance(result.data, TextList) LOG.info('RUN: call Analyzer') result = Analyzer().execute(tmp, person_names, is_debug) return ResultData([], True, None)
def _test(_dir): sample_names = get_sample_names(_dir) monitor_data = [] first_http_response = None for sample_name in sample_names: http_response = HttpResponse(open("%s/%s" % (_dir, sample_name)).read()) if first_http_response == None: first_http_response = http_response parsed_data = parser.parse_ms_monitor_result(http_response.body) parsed_data["sample_time"] = datetime.fromtimestamp(int(sample_name.split(".")[0])) monitor_data.append(parsed_data) az = Analyzer() result = az.do_analyze(monitor_data[0]["sample_time"], first_http_response, monitor_data[1:]) return result
def test_00(): url = "http://10.33.0.57/tag_live_monitor/tvie/zhso/xfw" stream_id = 0 _map = {} analyzer = Analyzer() http_client = MediaServerAdminQueryer(analyzer, stream_id, url, _map) asyncore.loop(map=_map) print http_client.response.body
def _test(_dir): sample_names = get_sample_names(_dir) monitor_data = [] first_http_response = None for sample_name in sample_names: http_response = HttpResponse( open("%s/%s" % (_dir, sample_name)).read()) if first_http_response == None: first_http_response = http_response parsed_data = parser.parse_ms_monitor_result(http_response.body) parsed_data['sample_time'] = datetime.fromtimestamp( int(sample_name.split('.')[0])) monitor_data.append(parsed_data) az = Analyzer() result = az.do_analyze(monitor_data[0]['sample_time'], first_http_response, monitor_data[1:]) return result
def __init__(self): # Initialising fields self.parameters = {} # Loading config self.config = self.loadConfig() self.analyzer = Analyzer(self.config) global analyzer_global analyzer_global = self.analyzer # Selecting mode if len(sys.argv) > 1: self.parse_args() self.analyzer.error_occurred.connect(self.print_error) filename = self.parameters["input"] if not os.path.exists(filename): self.print_error("File {} does not exist".format(filename)) sys.exit() try: text = self.analyzer.load_file(self.parameters["input"]) if not self.analyzer.isTextValid(text): self.print_error( "File {} does not contain valid text".format(filename)) sys.exit() except Exception as e: self.print_error("Error loading file {}:\n{}".format( filename, e)) sys.exit() result = self.analyzer.analyze(text, self.parameters) if result is None: self.print_error("Unknown error occurred") sys.exit() result.save_to_file(self.parameters["output"], self.parameters["threshold"], n_digits=5) sys.exit(0) else: show_splashscreen() self.ui = GUI(analyzer=self.analyzer, config=self.config)
def main(argv): if len(argv) < 2: raise Exception("Usage: python3 main.py filename (mode)") loaders = load_modules() # open file and check for loaders that are able to disass the file with open(argv[1], 'rb') as f: valid_loaders = [l for l in loaders if l.accept_file(f)] # TODO check whether elf or not exe = ELFExecutable(argv[1]) exe._extract_symbol_table() if not valid_loaders: print("+ No loaders recognize the file") sys.exit(-1) if len(valid_loaders) >= 2: loader = prompt_user_for_loader(valid_loaders) else: loader = valid_loaders[0] fname = os.path.basename(f.name) if len(argv) >= 3: os.system("python3 init_main.py " + fname + " disasm_all_mode") else: os.system("python3 init_main.py " + fname) if not os.path.isdir("input/" + fname): print("Input file(input/" + fname + ") does not exist") sys.exit() """ print("#" * 64) print("##" + " " * 23 + "KICKASS DISASS" + " " * 23 + "##") print("##" + " " * 60 + "##") print( "##" + "Powered by Capstone {:d}.{:d} ##".format( CS_API_MAJOR, CS_API_MINOR).rjust(62)) print("#" * 64 + "\n") """ if len(argv) >= 3: anal = Analyzer(loader, exe, f, argv[2]) else: anal = Analyzer(loader, exe, f) anal.start()
class MainApp(Tk): # analyzer is completely isolated from UI, check the '/analyzer' module analyzer = Analyzer() def __init__(self): super(MainApp, self).__init__() self.setup() self.top_frame = TopFrame(self) self.body_frame = BodyFrame(self) def setup(self): self.title('GitHub User Performance Analyzer') self.configure(bg='white') self.minsize(0, 500) self.resizable(0, 0) self.grid_columnconfigure(0, weight=1) self.grid_rowconfigure(1, weight=1) @staticmethod def confirm(question, icon='question'): result = messagebox.askquestion("Confirmation", question, icon=icon) return result == 'yes'
def setUp(self): self.analyzer = Analyzer() self.supervisor = Supervisor() self.test_data = read_test_data()
class PlagiarismTest(unittest.TestCase): def setUp(self): self.analyzer = Analyzer() self.supervisor = Supervisor() self.test_data = read_test_data() def tearDown(self): pass def test_find_similarity_groups_by_rows(self): similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9], [11, 5, 0, 7, 3], [7, 12, 8, 0, 14], [2, 10, 4, 12, 0]]) result = self.analyzer.find_similarity_groups_by_rows( similarity_matrix) self.assertEqual(result[1], 1) self.assertEqual(result[2], 2) self.assertEqual(result[3], 1) self.assertEqual(result[4], 3) def test_find_similarity_groups_by_mean_group_similarity(self): similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9], [11, 5, 0, 7, 3], [7, 12, 8, 0, 14], [2, 10, 4, 12, 0]]) result = self.analyzer.find_similarity_groups_by_mean_group_similarity( similarity_matrix) self.assertEqual(result[1], 1) self.assertEqual(result[2], 2) self.assertEqual(result[3], 1) self.assertEqual(result[4], 1) def test_analyze_test_via_supervisor(self): text = self.test_data["observer_test_text"] result = self.supervisor.markup(text) self.assertEqual(result.text, text) self.assertEqual(result.num_letters, 377) self.assertEqual(result.num_sentences, 10) self.assertEqual(result.num_words, 62) self.assertEqual(len(result.tokens), 62) self.assertEqual(len(result.morph_tokens), 62) def test_create_plagiarism_matrix(self): essays = self.test_data["plagiarism_test_essays"] essays = [self.supervisor.markup(essay) for essay in essays] matrix, coincidences = plagiarism.create_plagiarism_matrix(essays) self.assertEqual((matrix.shape == np.array([len(essays), len(essays)])).all(), True) # для полюсного эссе номер 1 self.assertEqual(matrix[0, 1] == 100, True) # эссе имеет сходство по 100% предолжений self.assertEqual(matrix[0, 2] > 33 and matrix[0, 2] < 65, True) # эссе имеет сходство по 50% предолжений self.assertEqual(matrix[0, 3] == 0, True) # для полюсного эссе номер 2 self.assertEqual(matrix[3, 0] == 0, True) self.assertEqual(matrix[3, 1] == 0, True) # эссе имеет сходство по 0% предолжений self.assertEqual(matrix[3, 2] > 35 and matrix[3, 2] < 65, True) # эссе имеет сходство по 50% предолжений def test_similarity_matrix(self): essays = self.test_data["similarity_test_essays"] essays = [self.supervisor.markup(essay) for essay in essays] result = similarity.create_similarity_matrix(essays) self.assertEqual((result.shape == np.array([len(essays), len(essays)])).all(), True) # для эссе номер 1 self.assertEqual(result[0, 1] > 13, True) # эссе на ту же тему self.assertEqual(result[0, 2] > 4 and result[0, 2] < 10, True) # эссе на пересекающуюся тему self.assertEqual(result[0, 3] < 4, True) # эссе на обособленную тему def test_read_from_file(self): expected_text = self.test_data["lecture_reader_expected_text"].strip() presentation_text = lecture_reader.read_from_file( TEST_PRESENTATION_PATH).strip() self.assertEqual(presentation_text, expected_text) def test_analyzer(self): lecture = Presentation(TEST_PRESENTATION_PATH) lecture_text = read_from_presentation(lecture) essays = self.test_data["analyzer_test_essays"] pd_essays = pd.DataFrame(data=essays, columns=["text"]) report = self.analyzer.analyze(lecture_text, pd_essays) self.assert_lecture(self.test_data["lecture_reader_expected_text"], report.lecture, 176) self.assert_essay(essays[0], report.essays[0], GradeType.FAIL, 1, [LabelType.FAIL, LabelType.LECTURE_PLAGIARISM], 302) self.assert_essay(essays[1], report.essays[1], GradeType.SUCCESS, 1, [LabelType.SUCCESS], 338) self.assert_essay(essays[2], report.essays[2], GradeType.FAIL, 2, [LabelType.FAIL], 246) def assert_lecture(self, text, lecture, num_words): self.assertEqual(lecture.text, text) self.assertEqual(lecture.statistic.num_letters, len(text)) self.assertEqual(lecture.statistic.num_sentences, len(sent_tokenize(text))) self.assertEqual(lecture.statistic.num_words, num_words) def assert_essay(self, text, essay, grade, group, labels, num_words): self.assertEqual(essay.text, text) self.assertEqual(essay.statistic.num_letters, len(text)) self.assertEqual(essay.statistic.num_sentences, len(sent_tokenize(text))) self.assertEqual(essay.statistic.num_words, num_words) self.assertEqual(essay.grade, grade) self.assertEqual(essay.group, group) self.assertEqual(len(labels), len(essay.labels)) for label in essay.labels: if label.type in labels: labels.remove(label.type) else: self.assertEqual(True, False) self.assertEqual(len(labels), 0) def test_analyzer_incorrect_essay_list(self): lecture = Presentation(TEST_PRESENTATION_PATH) essays = self.test_data["analyzer_test_essays"] pd_essays = pd.DataFrame(data=essays, columns=["roles"]) self.assertRaises(NotFoundEssayColumn, self.analyzer.analyze, lecture, pd_essays)
def __init__(self, strategy_data): self.strategy_data = strategy_data self.analyzer = Analyzer(self) self.email_service = EmailService()
class Strategy: def __init__(self, strategy_data): self.strategy_data = strategy_data self.analyzer = Analyzer(self) self.email_service = EmailService() def initialize(self, context): context.algo_id = self.strategy_data.get('algo_id') context.live_trading = self.strategy_data.get('live_trading') self.strategy_data.get('initialize')(context) if self.strategy_data.get('live_trading', False) is False: self.analyzer.initialize() else: self.email_service.initialize() def SendMessage(self, subject, message): if self.strategy_data.get('live_trading', False) is True: self.email_service.SendMessage(subject, message) def handle_data(self, context, data): self.strategy_data.get('handle_data')(context, data) if self.strategy_data.get('live_trading', False) is False: self.analyzer.handle_data(context) def analyze(self, context, data): print("Analyse method got called") self.strategy_data.get('analyze')(context, data) if self.strategy_data.get('live_trading', False) is False: self.analyzer.finalize() else: algo_id = self.strategy_data.get('algo_id') db_engine = create_engine('sqlite:///{}'.format( os.path.join(str(Path.home()), 'algodb.db'))) prev_date_sql = "select date from prev_run_date where algo_id={}".format( algo_id) prev_run_date = pd.read_sql(prev_date_sql, db_engine)['date'][0] run_date = str(context.datetime.date()) prev_pos_sql = "select holding_name, quantity, buy_price, last_price from daily_holdings " \ "where algo_id={} and date='{}'".format(algo_id, prev_run_date) prev_pos = pd.read_sql(prev_pos_sql, db_engine) if prev_pos.empty: prev_pos_list = [] else: prev_pos_list = list(prev_pos['holding_name']) prev_pos.set_index('holding_name', inplace=True) curr_positions = context.portfolio.positions.values() stock_email_columns = [ 'Holding', 'Shares', 'Buy Price', 'Yest Price', 'Current Price', 'Dollar Gain Today', 'Pct Gain Today', 'Dollar Gain Net', 'Pct Gain Net', 'Market Value' ] stock_email = pd.DataFrame(columns=stock_email_columns) sold_list = [] for position in list(curr_positions): if position.sid.symbol in prev_pos_list: if position.amount == 0: sold_list.append(position) continue prev_stock_pos = prev_pos.loc[position.sid.symbol] gain_today = position.last_sale_price - prev_stock_pos[ 'last_price'] pct_gain_today = str( round( (gain_today / prev_stock_pos['last_price']) * 100, 4)) + ' %' gain_total = position.last_sale_price - position.cost_basis pct_gain_total = str( round((gain_total / position.cost_basis) * 100, 4)) + ' %' stock_email.loc[position.asset] = [ position.asset.symbol, position.amount, round(position.cost_basis, 4), prev_stock_pos['last_price'], position.last_sale_price, gain_today, pct_gain_today, gain_total, pct_gain_total, position.amount * position.last_sale_price ] else: stock_email.loc[position.asset] = [ position.asset.symbol, position.amount, round(position.cost_basis, 4), '-', position.last_sale_price, '-', '-', '-', '-', position.amount * position.last_sale_price ] portfolio = context.portfolio # stock_email = stock_email.join(pd.DataFrame(portfolio.current_portfolio_weights, columns=['Weightage'])) # stock_email['Weightage'] = round(stock_email['Weightage'] * 100, 4).astype(str) + ' %' port_email = pd.Series([ round(portfolio.portfolio_value, 4), round(portfolio.pnl, 4), str( round( portfolio.pnl / (portfolio.portfolio_value - portfolio.pnl), 4)) + ' %', round(portfolio.cash, 4), round(portfolio.positions_value, 4) ], index=[ 'Portfolio Value', 'Net Gain', 'Percent Net Gain', 'Cash Value', 'Position Value' ]) message = "<p><h3>Holdings Summary</h3></p>" + stock_email.to_html(index=False) \ + "<p><h3>Portfolio Summary</h3></p>" + pd.DataFrame(port_email).T.to_html(index=False) subject = '{} : Daily Summary - {}'.format( self.strategy_data.get('algo_name'), run_date) self.email_service.SendNotifications(subject, message) prev_run_update_sql = "update prev_run_date set date='{}' where algo_id={}".format( run_date, algo_id) with db_engine.connect() as connection: try: for position in list(curr_positions): if position in sold_list: continue insert_holding_sql = "Insert into daily_holdings (date, algo_id, holding_name, quantity, " \ "buy_price, last_price) values ('{}',{},'{}',{},{},{})"\ .format(run_date, algo_id, position.sid.symbol, position.amount, round(position.cost_basis, 4), position.last_sale_price) connection.execute(insert_holding_sql) connection.execute(prev_run_update_sql) except Exception as e: print(e) self.strategy_data.get('after_trading_end')(context, data) def before_trading_start(self, context, data): self.strategy_data.get('before_trading_start')(context, data) if self.strategy_data.get('live_trading', False) is False: self.analyzer.before_trading_start() def run_algorithm(self): live_trading = self.strategy_data.get('live_trading', False) # If live_trading true, trade with Virtual broker using database prices, else run normal backtest # database prices are updated from master_algo kwargs = { 'start': self.strategy_data.get('start'), 'end': self.strategy_data.get('end'), 'initialize': self.initialize, 'handle_data': self.handle_data, 'analyze': self.analyze, 'before_trading_start': self.before_trading_start, 'bundle': 'quandl', 'capital_base': self.strategy_data.get('capital_base'), 'tws_uri': self.strategy_data.get('tws_uri'), 'live_trading': live_trading } run_algo_thread = threading.Thread(target=run_algorithm, kwargs=kwargs) run_algo_thread.start() if self.strategy_data.get('live_trading', False) is False: self.analyzer.show_plot() sys.exit(self.analyzer.app.exec_()) run_algo_thread.join()
threshold = self.main_window.opt_bar.threshold.value() result.save_to_file(filename=filename, threshold=threshold, n_digits=5) except Exception as e: self.error_occurred.emit("Не удалось экспортировать результат") def on_analysis_completed(self, result: Predict): params = self.main_window.opt_bar.options_to_dict() self.main_window.result_widget.show_output(result, params) def show_splashscreen(): splash = qw.QSplashScreen(QPixmap(":/Splash_email_v2.png"), qc.Qt.WindowStaysOnTopHint) time = qc.QTime() splash.show() time.start() while time.elapsed() <= 3000: pass splash.finish(None) if __name__ == '__main__': from PyQt5.QtWidgets import QApplication import sys from configparser import ConfigParser a = QApplication(sys.argv) c = ConfigParser() c.read("../config.ini") g = GUI(config=c, analyzer=Analyzer(c)) a.exec()
def run(argv): if argv == 'downloader': Downloader.run() elif argv == 'analyzer': Analyzer().run()
import datetime from eye.eye import Eye from analyzer.analyzer import Analyzer from sender.sender import Sender from config.config import Config configuration = Config() eye = Eye() analyzer = Analyzer() sender = Sender() frame = eye.capture() while True: data = analyzer.analyze(frame) sender.send(data) frame = eye.capture()
def get_analyzer(holiday_plan_id): if HolidayPlan.query.filter_by(id=holiday_plan_id).scalar() is None: return Analyzer() analyzer_as_json = \ HolidayPlan.query.filter_by(id=holiday_plan_id).first().analyzer_as_json return Analyzer().from_json(analyzer_as_json)
def run_multiple(self): #Two options, generator if self.main_services.settings.generator and self.main_services.settings.trainer: Generator.main(self) Trainer.main(self) elif self.main_services.settings.generator and self.main_services.settings.analyzer: Generator.main(self) Analyzer.main(self) elif self.main_services.settings.generator and self.main_services.settings.load_simulator: Generator.main(self) simulator: Simulator = Simulator(self.main_services) simulator.start() #Three options, generator elif self.main_services.settings.generator and self.main_services.settings.trainer and self.main_services.settings.analyzer: Generator.main(self) Trainer.main(self) Analyzer.main(self) elif self.main_services.settings.generator and self.main_services.settings.trainer and self.main_services.settings.load_simulator: simulator: Simulator = Simulator(self.main_services) simulator.start() Generator.main(self) Trainer.main(self) #Four options elif self.main_services.settings.generator and self.main_services.settings.trainer and self.main_services.settings.analyzer and self.main_services.settings.load_simulator: simulator: Simulator = Simulator(self.main_services) simulator.start() Generator.main(self) Trainer.main(self) Analyzer.main(self) #Trainer elif self.main_services.settings.trainer and self.main_services.settings.analyzer: Trainer.main(self) Analyzer.main(self) elif self.main_services.settings.trainer and self.main_services.settings.load_simulator: simulator: Simulator = Simulator(self.main_services) simulator.start() Trainer.main(self) elif self.main_services.settings.trainer and self.main_services.settings.analyzer and self.main_services.settings.load_simulator: simulator: Simulator = Simulator(self.main_services) simulator.start() Trainer.main(self) Analyzer.main(self) #Analyzer elif self.main_services.settings.analyzer and self.main_services.settings.load_simulator: simulator: Simulator = Simulator(self.main_services) simulator.start() Analyzer.main(self) #Singles elif self.main_services.settings.generator: Generator.main(self) elif self.main_services.settings.trainer: Trainer.main(self) elif self.main_services.settings.analyzer: Analyzer.main(self) elif self.main_services.settings.load_simulator: simulator: Simulator = Simulator(self.main_services) simulator.start() if self.main_services.settings.clear_cache: self.main_services.resources.cache_dir.clear()
args = vars(parser.parse_args()) print("\n\n") print(datetime.datetime.now().strftime('%d %b %G %I:%M%p')) print("\n") params = dict() params['index_by'] = 'titles' params['enable_filtering'] = False params['resources'] = "./res/" params['similarity_threshold'] = .4 params['server_mode'] = False # parse argument if args['server']: params['server_mode'] = True def print_configuration(params): print("Configuration:") print("\tIndex by: %s" % params['index_by']) print("\tFiltering enabled: %s" % repr(params['enable_filtering'])) print("\tResource folder: %s" % params['resources']) print("\tSimilarity threshold: %.2f" % params['similarity_threshold']) print("\tServer mode enabled: %s" % repr(params['server_mode'])) print_configuration(params) a = Analyzer(params) a.begin()
import sys from parser.parser import Parser from compiler.compiler import Compiler from analyzer.analyzer import Analyzer BUILTINS_FILE = "src/runtime/builtins_signatures" if __name__ == '__main__': if len(sys.argv) != 3: puts( "Error! Provide source and target files as argunents to this script" ) exit(1) code_file = sys.argv[1] target_file = sys.argv[2] builtins = BUILTINS_FILE ast = Parser(code_file, [builtins]).parse() if Analyzer().validate(ast): Compiler(target_file).compile(ast)
def scan_file(path): path = os.path.normpath(path) tree = build_ast(path) analyzer = Analyzer(path) analyzer.visit(tree) analyzer.report()
class EstimatorServer(): def __init__(self, gdrive_certificat_path): self.server = Flask(__name__, static_url_path='', static_folder='frontend/static') self.socketio = SocketIO(self.server) self.session_maker = run_orm() self.analyzer = Analyzer() self.drive = Drive(gdrive_certificat_path) self.server.route('/', methods=['GET'])(self.index) self.server.route('/upload', methods=['POST'])(self.upload_task) self.server.route('/end_check', methods=['POST'])(self.end_check) def item_callback(self, current_item_id, count_items): self.socketio.emit( 'changed-report-status', json.dumps({ 'status': 'handling', 'description': f'Обработано ответов из архива {current_item_id} из {count_items}' })) def load_lecture_file(self, file): extension = re.findall(r'\.\w+$', file.filename)[0] if extension == '.pptx': lecture = Presentation(file) return read_from_presentation(lecture) if extension == '.txt': wrapper = io.TextIOWrapper(file, encoding='utf-8') return wrapper.read() if extension == '.docx': lecture = docx.Document(file) fullText = [] for para in lecture.paragraphs: fullText.append(para.text) return ' '.join(fullText) raise NotSupportLectureExtensionType(extension) def load_essays_file(self, file): extension = re.findall(r'\.\w+$', file.filename)[0] if extension == '.csv': return pd.read_csv(file) if extension == '.xlsx': return pd.read_excel(file) if extension == '.zip': archive = zipfile.ZipFile(file, 'r') return download_archive(self.drive, archive, self.item_callback) raise NotSupportEssayExtensionType(extension) def index(self): return self.server.send_static_file("index.html") def upload_task(self): try: if 'lecture' not in request.files or 'essays' not in request.files: return redirect(request.url) lecture = request.files['lecture'] essays = request.files['essays'] if lecture.filename == '' or essays.filename == '': return redirect(request.url) self.socketio.emit( 'changed-report-status', json.dumps({ 'status': 'handling', "description": "Обработка файлов" })) lecture = self.load_lecture_file(lecture) essays = self.load_essays_file(essays) essays = essays.dropna(axis=0) self.socketio.emit( 'changed-report-status', json.dumps({ 'status': 'handling', "description": "Оценка эссе" })) report = self.analyzer.analyze(lecture, essays) report_schema = ReportSchema() session = self.session_maker() session.add(report) session.commit() self.socketio.emit('changed-report-status', json.dumps({'status': 'handled'})) return report_schema.dump(report) except Exception as e: print(e) print(traceback.print_exc()) if type(e) == NotFoundEssayColumn or type(e) == NotSupportEssayExtensionType\ or type(e) == NotSupportLectureExtensionType: return json.dumps({"status": "error", "text": str(e)}), 500 else: return json.dumps({ "status": "error", "text": "Ошибка оценки загруженных эссе" }), 500 def end_check(self): try: session = self.session_maker() report_schema = ReportSchema() json_data = json.loads(request.data) report = report_schema.load(data=json_data, session=session) if report.lecture is None: raise Exception("Received incorrect data. Report not found") session.commit() return json.dumps({ "status": "success", "text": "Результаты проверки эссе успешно сохранены" }) except Exception as e: print(e) print(traceback.print_exc()) session.close() return json.dumps({ "status": "error", "text": "Ошибка сохранения резултатов проверки" }), 500 def start(self): self.socketio.run(self.server, host='localhost', port=5000)
def main(): parser = argparse.ArgumentParser(description="Gaussian Input Output HMM") parser.add_argument('--data', type=str, default='./dataset/ptb/', help='location of the data corpus') parser.add_argument('--batch', type=int, default=256) parser.add_argument('--optim', choices=['sgd', 'adam'], default='adam') parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lr_decay', type=float, default=0.999995, help='Decay rate of learning rate') parser.add_argument('--amsgrad', action='store_true', help='AMD Grad') parser.add_argument('--weight_decay', type=float, default=0.001, help='weight for l2 norm decay') parser.add_argument('--warmup_steps', type=int, default=0, metavar='N', help='number of steps to warm up (default: 0)') parser.add_argument('--var_scale', type=float, default=1.0) parser.add_argument('--log_dir', type=str, default='./output/' + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S") + "/") parser.add_argument('--dim', type=int, default=10) parser.add_argument('--gpu', action='store_true') parser.add_argument('--random_seed', type=int, default=10) parser.add_argument('--in_mu_drop', type=float, default=0.0) parser.add_argument('--in_cho_drop', type=float, default=0.0) parser.add_argument('--t_mu_drop', type=float, default=0.0) parser.add_argument('--t_cho_drop', type=float, default=0.0) parser.add_argument('--out_mu_drop', type=float, default=0.0) parser.add_argument('--out_cho_drop', type=float, default=0.0) parser.add_argument('--trans_cho_method', type=str, choices=['random', 'wishart'], default='random') parser.add_argument( '--input_cho_init', type=float, default=0.0, help= 'init method of input cholesky matrix. 0 means random. The other score means constant' ) parser.add_argument( '--trans_cho_init', type=float, default=1.0, help='init added scale of random version init_cho_init') parser.add_argument( '--output_cho_init', type=float, default=0.0, help= 'init method of output cholesky matrix. 0 means random. The other score means constant' ) # i_comp_num = 1, t_comp_num = 1, o_comp_num = 1, max_comp = 1, parser.add_argument('--input_comp_num', type=int, default=1, help='input mixture gaussian component number') parser.add_argument('--tran_comp_num', type=int, default=1, help='transition mixture gaussian component number') parser.add_argument('--output_comp_num', type=int, default=1, help='output mixture gaussian component number') parser.add_argument( '--threshold', type=float, default=1.0, help= 'pruning hyper-parameter, greater than 1 is max component, less than 1 is max value' ) parser.add_argument('--unk_replace', type=float, default=0.0, help='The rate to replace a singleton word with UNK') parser.add_argument('--tran_weight', type=float, default=0.0001) parser.add_argument('--input_weight', type=float, default=0.0) parser.add_argument('--output_weight', type=float, default=0.0) parser.add_argument('--emission_cho_grad', type=bool, default=False) parser.add_argument('--transition_cho_grad', type=bool, default=True) parser.add_argument('--decode_cho_grad', type=bool, default=False) parser.add_argument('--gaussian_decode', action='store_true') parser.add_argument('--analysis', action='store_true') parser.add_argument('--sep_normalize', type=float, default=0.01) args = parser.parse_args() np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) random.seed(args.random_seed) log_dir = args.log_dir # setting optimizer optim = args.optim batch_size = args.batch # optim = 'sgd' lr = args.lr lr_decay = args.lr_decay warmup_steps = args.warmup_steps amsgrad = args.amsgrad weight_decay = args.weight_decay root = args.data in_mu_drop = args.in_mu_drop in_cho_drop = args.in_cho_drop t_mu_drop = args.t_mu_drop t_cho_drop = args.t_cho_drop out_mu_drop = args.out_mu_drop out_cho_drop = args.out_cho_drop tran_cho_method = args.trans_cho_method input_cho_init = args.input_cho_init trans_cho_init = args.trans_cho_init output_cho_init = args.output_cho_init input_num_comp = args.input_comp_num tran_num_comp = args.tran_comp_num output_num_comp = args.output_comp_num threshold = args.threshold unk_replace = args.unk_replace normalize_weight = [ args.tran_weight, args.input_weight, args.output_weight ] gaussian_decode = args.gaussian_decode sep_normalize = args.sep_normalize analysis = args.analysis EMISSION_CHO_GRAD = args.emission_cho_grad TRANSITION_CHO_GRAD = args.transition_cho_grad DECODE_CHO_GRAD = args.decode_cho_grad if not os.path.exists(log_dir): os.makedirs(log_dir) # save parameter save_parameter_to_json(log_dir, vars(args)) logger = get_logger('Sequence-Labeling') change_handler(logger, log_dir) # logger = LOGGER logger.info(args) device = torch.device('cuda') if args.gpu else torch.device('cpu') # Loading data logger.info('Load PTB data....') alphabet_path = os.path.join(root, 'alphabets') train_path = os.path.join(root, 'train.conllu') dev_path = os.path.join(root, 'dev.conllu') test_path = os.path.join(root, 'test.conllu') word_alphabet, char_alphabet, pos_alphabet, type_alphabet = conllx_data.create_alphabets( alphabet_path, train_path, data_paths=[dev_path, test_path], embedd_dict=None, max_vocabulary_size=1e5, min_occurrence=1) train_dataset = conllx_data.read_bucketed_data(train_path, word_alphabet, char_alphabet, pos_alphabet, type_alphabet) num_data = sum(train_dataset[1]) dev_dataset = conllx_data.read_data(dev_path, word_alphabet, char_alphabet, pos_alphabet, type_alphabet) test_dataset = conllx_data.read_data(test_path, word_alphabet, char_alphabet, pos_alphabet, type_alphabet) logger.info("Word Alphabet Size: %d" % word_alphabet.size()) logger.info("Character Alphabet Size: %d" % char_alphabet.size()) logger.info("POS Alphabet Size: %d" % pos_alphabet.size()) ntokens = word_alphabet.size() nlabels = pos_alphabet.size() # init analyzer if analysis: analyzer = Analyzer(word_alphabet=word_alphabet, pos_alphabet=pos_alphabet) else: analyzer = None # build model if threshold >= 1.0: model = MixtureGaussianSequenceLabeling( dim=args.dim, ntokens=ntokens, nlabels=nlabels, t_cho_method=tran_cho_method, t_cho_init=trans_cho_init, in_cho_init=input_cho_init, out_cho_init=output_cho_init, in_mu_drop=in_mu_drop, in_cho_drop=in_cho_drop, t_mu_drop=t_mu_drop, t_cho_drop=t_cho_drop, out_mu_drop=out_mu_drop, out_cho_drop=out_cho_drop, i_comp_num=input_num_comp, t_comp_num=tran_num_comp, o_comp_num=output_num_comp, max_comp=int(threshold), gaussian_decode=gaussian_decode) else: model = ThresholdPruningMGSL(dim=args.dim, ntokens=ntokens, nlabels=nlabels, t_cho_method=tran_cho_method, t_cho_init=trans_cho_init, in_cho_init=input_cho_init, out_cho_init=output_cho_init, in_mu_drop=in_mu_drop, in_cho_drop=in_cho_drop, t_mu_drop=t_mu_drop, t_cho_drop=t_cho_drop, out_mu_drop=out_mu_drop, out_cho_drop=out_cho_drop, i_comp_num=input_num_comp, t_comp_num=tran_num_comp, o_comp_num=output_num_comp, threshold=threshold, gaussian_decode=gaussian_decode) # model = RNNSequenceLabeling("LSTM", ntokens=ntokens, nlabels=nlabels, ninp=args.dim, nhid=args.dim, dropout=in_mu_drop) # model = WeightIOHMM(vocab_size=ntokens, nlabel=nlabels, num_state=100) model.to(device) logger.info('Building model ' + model.__class__.__name__ + '...') # optimizer = optim.Adam(model.parameters(), lr=lr) parameters_need_update = filter(lambda p: p.requires_grad, model.parameters()) optimizer, scheduler = get_optimizer(parameters_need_update, optim, lr, amsgrad, weight_decay, lr_decay=lr_decay, warmup_steps=warmup_steps) # depend on dev ppl best_epoch = (-1, 0.0, 0.0) num_batches = num_data // batch_size + 1 def train(best_epoch, thread=6): epoch = 0 while epoch - best_epoch[0] <= thread: epoch_loss = 0 num_back = 0 num_words = 0 num_insts = 0 model.train() for step, data in enumerate( iterate_data(train_dataset, batch_size, bucketed=True, unk_replace=unk_replace, shuffle=True)): # for j in tqdm(range(math.ceil(len(train_dataset) / batch_size))): optimizer.zero_grad() # samples = train_dataset[j * batch_size: (j + 1) * batch_size] words, labels, masks = data['WORD'].to(device), data['POS'].to( device), data['MASK'].to(device) loss = 0.0 if threshold >= 1.0: # sentences, labels, masks, revert_order = standardize_batch(samples) loss = model.get_loss(words, labels, masks, normalize_weight=normalize_weight, sep_normalize=sep_normalize) else: for i in range(batch_size): loss += model.get_loss( words[i], labels[i], masks[i], normalize_weight=normalize_weight, sep_normalize=sep_normalize) # loss = model.get_loss(words, labels, masks) loss.backward() optimizer.step() scheduler.step() epoch_loss += (loss.item()) * words.size(0) num_words += torch.sum(masks).item() num_insts += words.size()[0] if step % 10 == 0: torch.cuda.empty_cache() sys.stdout.write("\b" * num_back) sys.stdout.write(" " * num_back) sys.stdout.write("\b" * num_back) curr_lr = scheduler.get_lr()[0] log_info = '[%d/%d (%.0f%%) lr=%.6f] loss: %.4f (%.4f)' % ( step, num_batches, 100. * step / num_batches, curr_lr, epoch_loss / num_insts, epoch_loss / num_words) sys.stdout.write(log_info) sys.stdout.flush() num_back = len(log_info) logger.info('Epoch ' + str(epoch) + ' Loss: ' + str(round(epoch_loss / num_insts, 4))) if threshold >= 1.0: acc, _ = evaluate(dev_dataset, batch_size, model, device) else: acc, _ = evaluate(dev_dataset, 1, model, device) logger.info('\t Dev Acc: ' + str(round(acc * 100, 3))) if analysis: analyse(model, dev_dataset, batch_size, device, analyzer, log_dir + '/dev_' + str(epoch), buckted=False) analyse(model, test_dataset, batch_size, device, analyzer, log_dir + '/test_' + str(epoch), buckted=False) if best_epoch[1] < acc: test_acc, _ = evaluate(test_dataset, batch_size, model, device) logger.info('\t Test Acc: ' + str(round(test_acc * 100, 3))) best_epoch = (epoch, acc, test_acc) patient = 0 else: patient += 1 epoch += 1 if patient > 4: print('reset optimizer momentums') scheduler.reset_state() patient = 0 logger.info("Best Epoch: " + str(best_epoch[0]) + " Dev ACC: " + str(round(best_epoch[1] * 100, 3)) + "Test ACC: " + str(round(best_epoch[2] * 100, 3))) return best_epoch best_epoch = train(best_epoch, thread=10) # logger.info("After tunning mu. Here we tunning variance") # # flip gradient # # for parameter in model.parameters(): # # flip # parameter.requires_grad = not parameter.requires_grad # best_epoch = train(best_epoch) with open(log_dir + '/' + 'result.json', 'w') as f: final_result = { "Epoch": best_epoch[0], "Dev": best_epoch[1] * 100, "Test": best_epoch[2] * 100 } json.dump(final_result, f)
class ATC: section = "AvailableOptions" def __init__(self): # Initialising fields self.parameters = {} # Loading config self.config = self.loadConfig() self.analyzer = Analyzer(self.config) global analyzer_global analyzer_global = self.analyzer # Selecting mode if len(sys.argv) > 1: self.parse_args() self.analyzer.error_occurred.connect(self.print_error) filename = self.parameters["input"] if not os.path.exists(filename): self.print_error("File {} does not exist".format(filename)) sys.exit() try: text = self.analyzer.load_file(self.parameters["input"]) if not self.analyzer.isTextValid(text): self.print_error( "File {} does not contain valid text".format(filename)) sys.exit() except Exception as e: self.print_error("Error loading file {}:\n{}".format( filename, e)) sys.exit() result = self.analyzer.analyze(text, self.parameters) if result is None: self.print_error("Unknown error occurred") sys.exit() result.save_to_file(self.parameters["output"], self.parameters["threshold"], n_digits=5) sys.exit(0) else: show_splashscreen() self.ui = GUI(analyzer=self.analyzer, config=self.config) def parse_args(self): description = "Automated Text Classifier for VINITI. Чтобы запустить графический сеанс, " \ "запустите программу без аргументов" argparser = ArgumentParser(prog="ATC", description=description) formats = self.config.get(self.section, "formats").split(", ") languages = self.config.get(self.section, "languages").split(", ") norm_options = self.config.get(self.section, "norm_predict").split(", ") argparser.add_argument("-i", "--input", help="полный путь к файлу с текстом", required=True) # type=unescaped_str argparser.add_argument( "-o", "--output", help="полный путь к файлу, в который будет записан результат", required=True) argparser.add_argument("-id", "--rubricator-id", help="идентификатор рубрикатора", required=True) argparser.add_argument("-f", "--format", help="формат входного файла", choices=formats, required=False) argparser.add_argument("-l", "--language", help="язык входного текста", choices=languages, required=True) argparser.add_argument("-t", "--threshold", help="пороговое значение вероятности. " + "Ответы классификатора с вероятностью ниже " + "заданной выведены не будут", default=0.0, type=float, required=False) argparser.add_argument( "-n", "--normalize", help="нормировать ли предсказание классификатора", choices=norm_options, required=False, default="not") subparsers = argparser.add_subparsers(help="Commands") # Creating server command server_parser = subparsers.add_parser("server", help="запустить режим сервера") server_parser.add_argument( "port", help="номер порта, на котором запустить сервер", action=LaunchServer, type=int) self.parameters = vars(argparser.parse_args()) @staticmethod def print_error(error_msg: str): print(error_msg, file=sys.stderr) @staticmethod def loadConfig(): parser = ConfigParser() parser.read([os.path.join(os.path.dirname(__file__), "config.ini")], encoding="utf-8") return parser