def on_mouse(event, x, y, flags, param): """ Function: on_mouse ------------------ callback for clicking the image; has you input the board coords of the point you just clicked stores results in corner_board_points and corner_image_points """ #=====[ Step 1: only accept button down events ]===== if not event == cv2.EVENT_LBUTTONDOWN: return #=====[ Step 2: get corresponding points ]===== print "=====[ Enter board coordinates: ]=====" # board_x = int(raw_input ('>>> x: ')) # board_y = int(raw_input ('>>> y: ')) # board_point = (board_x, board_y) keypoint = get_closest_keypoint((x, y), param) image_point = keypoint.pt print "Stored as: " # print " - board_point: ", board_point print " - image_point: ", image_point # corner_board_points.append (board_point) corner_image_points.append(image_point) corner_keypoints.append(keypoint) print_message("ESC to exit")
def on_mouse(event, x, y, flags, param): """ Function: on_mouse ------------------ callback for clicking the image; has you input the board coords of the point you just clicked stores results in corner_board_points and corner_image_points """ #=====[ Step 1: only accept button down events ]===== if not event == cv2.EVENT_LBUTTONDOWN: return #=====[ Step 2: get corresponding points ]===== print "=====[ Enter board coordinates: ]=====" # board_x = int(raw_input ('>>> x: ')) # board_y = int(raw_input ('>>> y: ')) # board_point = (board_x, board_y) keypoint = get_closest_keypoint ((x, y), param) image_point = keypoint.pt print "Stored as: " # print " - board_point: ", board_point print " - image_point: ", image_point # corner_board_points.append (board_point) corner_image_points.append (image_point) corner_keypoints.append (keypoint) print_message ("ESC to exit")
def tf_idf(): util.print_message('Start counting tf-idf...', debug=True) if not os.path.exists(settings.TFIDF_FILE_PATH): os.mkdir(settings.TFIDF_FILE_PATH) c = Calculator() file_names = util.get_file_list(settings.WORD_COUNT_FILE_PATH) for file_name in file_names: util.print_message('Processing tf-idf on {0}', arg=file_name) c.tf_idf(file_name, None, None)
def youtube_dl_download(url): util.print_message('Downloading mp3 from URL...') try: with youtube_dl.YoutubeDL(conf.YOUTUBE_DL_OPTS) as ydl: ydl.download([url]) except: util.print_message('Failed to download file from {}\n' 'Please try again...'.format(url), color='red', exit=True)
def push_to_mongo(db, message): try: record = db.arch.find_one({'url': message.url}) if record is None: db.arch.insert_one(message.__dict__) else: util.print_message( '{0} already exists in mongo'.format(message.url)) except: e = sys.exc_info()[0] util.print_message('Exception happened {0}'.format(e))
def main(): if len(sys.argv) == 2: url = sys.argv[1] elif len(sys.argv) > 2: # TODO: Reject all and prompt or quit with usage. error_warning('WARNING: Only using first command line argument', color='yellow') try: util.display_intro() username = raw_input('Enter your Spotify username: '******'url' not in locals(): url = raw_input('Please enter a valid YouTube/SoundCloud URL: ') youtube_dl_download(url) result = acoustid_search() percentage = float(result[0]) * 100 match_track = clean_track(str(result[2])) match_artist = clean_artist(str(result[3])) util.print_message('{:04.2f}% MATCH: {} by {}' \ .format(percentage, match_track, match_artist), color='cyan') track = spotted.search_song(match_track, match_artist) full_title = '{0} by {1}'.format(track['name'], track['artists'][0]['name']) while True: try: prompt = 'Would you like to add {} to {}? (y/n): ' \ .format(full_title, spotted.playlist['name']) status = raw_input(prompt).lower()[0] if status == 'n': util.print_message('Thanks for trying! Come again.', exit=True) return elif status == 'y': spotted.add_song_to_playlist(track) return else: util.print_message('Select from either Y or N.', color='red') except TypeError: util.print_message('Enter a valid option: Y or N.', color='red') except KeyboardInterrupt: util.print_message('\nExiting Spotted on Spotify...', exit=True)
def count_words(): util.print_message('Start counting words...', debug=True) if not os.path.exists(settings.WORD_COUNT_FILE_PATH): os.mkdir(settings.WORD_COUNT_FILE_PATH) client = MongoClient(settings.MONGO_HOST, 27017) db = client[settings.MONGO_DATABASE] c = Calculator() cursor = db.arch.find() for post in cursor: c.tf(str(post['_id'])+'.txt', post['url'], post['body'])
def authenticate(username,password,token): global print_exceptions,bend_url pagename = "Authentication %s" caption = "Authentication %s" msg = 'authentication %s!' try: if username: bend = SOAPpy.SOAPProxy(bend_url) success,ref = bend.authenticate(ip,username,password,token) xref = ref if token == "0": xref = "/cgi-bin/auth.py?a=z&token=0" if success: flog.info("User %s authentication successful from %s" % (username,ip)) xmsg = msg % ("successful! You will be now redirected to originally requested site",) if token == 0: xmsg = msg % ("successful! You will be redirected to your status page",) util.print_message(pagename % ("succeeded",), caption % ("succeeded",), xmsg, redirect_url=xref, redirect_time=0) else: flog.info("User %s authentication failed from %s" % (username,ip)) util.print_message(pagename % ("failed",), caption % ("failed",), msg % ("failed",), redirect_url=xref, redirect_time=1) else: util.print_message(" ", " ", "...", redirect_url="/cgi-bin/auth.py?token=%s" % (token,), redirect_time=0) except Exception,e: flog.error("exception caught: %s" % (str(e),)) if print_exceptions: util.print_message("Whoops!","Exception caught!",str(e) +" " + traceback.format_exc(100)) + " backend URL %s" % (bend_url,) else: util.print_message("Authentication failed","Authentication failed","There was a problem to validate your credentials. Please contact system administrator.")
def get_keywords_map(): util.print_message('Start counting keywords map...', debug=True) keywords_map = {} file_names = util.get_file_list(settings.TFIDF_FILE_PATH) for file_name in file_names: util.print_message('Processing keywords on {0}', arg=file_name) tf_idf_dict = util.file2dict(settings.TFIDF_FILE_PATH, file_name) for item in sorted(tf_idf_dict.items(), key=operator.itemgetter(1), reverse=True)[:20]: if keywords_map.has_key(item[0]): keywords_map[item[0]] += 1 else: keywords_map[item[0]] = 1 util.save_sorted_dict(settings.DATA_PATH, settings.KEYWORD_MAP, keywords_map)
def test_keywords(): client = MongoClient(settings.MONGO_HOST, settings.MONGO_PORT) db = client[settings.MONGO_DATABASE] cursor = db.arch.find().limit(10) for post in cursor: id = post['_id'] title = post['title'] tf_idf_dict = util.file2dict( settings.TFIDF_FILE_PATH, str(id) + '.txt') util.print_message(title.encode('gbk')) for item in sorted(tf_idf_dict.items(), key=operator.itemgetter(1), reverse=True)[:10]: util.print_message('{0}:{1}'.format(item[0].decode( 'utf8').encode('gbk'), item[1]))
def respond(self, response): msg = "Sending complete job to the dashboard with response {}".format(response) print_message(msg, 'ok') request = json.dumps({ 'job_id': self.options.get('job_id'), 'request': 'complete', 'output': response }) url = 'http://' + FRONTEND_POLLER_HOST try: r = requests.post(url, request) except Exception as e: raise e return
def update_keywords(): util.print_message('Start updating keywords...', debug=True) client = MongoClient(settings.MONGO_HOST, settings.MONGO_PORT) db = client[settings.MONGO_DATABASE] cursor = db.arch.find() for post in cursor: id = post['_id'] tf_idf_dict = util.file2dict(settings.TFIDF_FILE_PATH, str(id) + '.txt') tags = [] for item in sorted(tf_idf_dict.items(), key=operator.itemgetter(1), reverse=True)[:20]: tags.append(item[0]) util.print_message(' '.join(tags)) db.arch.update_one({'_id': id}, {'$set': {'tags': tags}})
def sanitize_input(self, options): validated_options = {} print_message(options, 'ok') expected_params = [ 'server', 'username', 'password', 'path', 'job_id' ] for key in options: if key in expected_params: validated_options[key] = options[key] else: print_message('Unexpected option {}'.format(key)) return validated_options
def count_all_words(): util.print_message('Start counting all words...', debug=True) all_word_count = {} file_names = util.get_file_list(settings.WORD_COUNT_FILE_PATH) for file_name in file_names: util.print_message('Processing all word count on {0}', arg=file_name) word_count_dict = util.file2dict(settings.WORD_COUNT_FILE_PATH, file_name, True) for key, value in word_count_dict.iteritems(): if all_word_count.has_key(key): all_word_count[key] = all_word_count[key] + value else: all_word_count[key] = value f = open(settings.WORD_COUNT_TOTAL, 'w+') for item in sorted(all_word_count.items(), key=operator.itemgetter(1), reverse=True): f.write('{0}:{1}\n'.format(item[0], item[1])) f.close()
def onpress (event): """ Function: press --------------- callback for user pressing keys; user enters esc -> this quits """ #=====[ Step 1: verify key ]===== if not event.key == 'escape': return #=====[ Step 2: save BoardImage ]===== image_name = 'board_image.bi' board_image.save (image_name) print_message ("BoardImage saved to " + image_name) exit ()
def handle(self): server = self.options.get('server', DIAG_VIEWER_HSOT) client = DiagnosticsViewerClient( server=server, cert=False) try: id, key = client.login( self.options['username'], self.options['password']) except Exception as e: print_debug(e) return -1 path = self.options.get('path') print_message('Uploading directory {}'.format(path)) try: dataset_id = client.upload_package(path) except Exception as e: print_debug(e) return -1 return json.dumps({'dataset_id': dataset_id})
def handle(self): args = ' '.join(self.call_args) msg = "Starting job: {}".format(args) print_message(msg, 'ok') process = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) output = process.communicate() archive_path = '{prefix}{user}/run_archives/{run_name}_{id}/'.format( prefix=USER_DATA_PREFIX, user=self.config.get('user', 'default'), id=self.config.get('job_id'), run_name=self.config.get('run_name', 'default')) archive_filename = archive_path + 'output_archive' if not os.path.exists(archive_filename + '.tar.gz'): try: print_message( 'creating output archive {}'.format(archive_filename + '.tar.gz'), 'ok') shutil.make_archive( archive_filename, 'gztar', self.config.get('output_dir')) except: raise else: print_message('archive {} already exists'.format(archive_filename + '.tar.gz')) return output
def recover_privkey_helper(pubkey, keysize, keyparams): print('[*] [RSA-%d] Finding the prime number p...' % keysize) n_bytes = util.to_bytes(pubkey.n) kp_keysize = keysize // 2 kp_pubkey = RSA.importKey(keyparams.get_pubkey(kp_keysize)) kp_r2_bytes = base64.b64decode(keyparams.get_r2(kp_keysize)) kp_r2 = util.to_number(kp_r2_bytes) kp_privkey = RSA.importKey(keyparams.get_privkey(kp_keysize)) encrypted_p_xor_r1_xor_r2 = n_bytes[0:len(kp_r2_bytes)] original_kp_r1_bytes = base64.b64decode(keyparams.get_r1(kp_keysize)) for i in range(0, 0xffffff): kp_r2_bytes = util.to_bytes(kp_r2) encrypted_p_xor_r1 = bytes_xor(encrypted_p_xor_r1_xor_r2, kp_r2_bytes) p_xor_r1_bytes = util.rsa_decrypt(kp_privkey, bytes(encrypted_p_xor_r1)) kp_r1 = util.to_number(original_kp_r1_bytes) for j in range(0, 0xa): for k in range(0, 0xa): kp_r1_bytes = util.to_bytes(kp_r1) p_bytes = bytes_xor(p_xor_r1_bytes, kp_r1_bytes) p = util.to_number(p_bytes) util.print_message('[RSA-%d] [%d:%d:%d] %d' % (keysize, i, j, k, p)) if sympy.isprime(p) and sympy.isprime((p - 1) // 2): privkey = util.rsa_construct_private_key(p, pubkey) if privkey: print('\n[+] [RSA-%d] p = %d' % (keysize, p)) print('[+] [RSA-%d] Private key is recovered' % keysize) return privkey kp_r1 += 1 kp_r1 = permute_r_key(kp_r1, keysize) kp_r2 += 1 print('\n[-] [RSA-%d] Cannot recover the private key' % keysize) return None
def idf(): word_doc_freq = {} util.print_message('Start counting idf...', debug=True) tag_dict = util.load_dictionary(settings.DATA_PATH, settings.USER_DICT) file_names = util.get_file_list(settings.WORD_COUNT_FILE_PATH) for file_name in file_names: util.print_message('Processing all word count on {0}', arg=file_name) word_count_dict = util.file2dict(settings.WORD_COUNT_FILE_PATH, file_name) for key in word_count_dict.iterkeys(): if not tag_dict.has_key(key): continue if word_doc_freq.has_key(key): word_doc_freq[key] = word_doc_freq[key] + 1 else: word_doc_freq[key] = 1 util.save_sorted_dict(settings.DATA_PATH, settings.WDF_FILE, word_doc_freq) doc_number = len(file_names) inverse_doc_freq = {k: math.log(float(doc_number) / (1 + v)) for k, v in word_doc_freq.items()} util.save_sorted_dict(settings.DATA_PATH, settings.IDF_FILE, inverse_doc_freq) return inverse_doc_freq
def get_first_valid_encoding(results): for result in results: if all(_ is not None for _ in result): result = list(result) try: result[2] = result[2].replace(u'\u2018', u'\'') \ .replace(u'\u2019', u'\'') \ .replace(u'\u201C', u'\"') \ .replace(u'\u201D', u'\"') result[2].encode('ascii', 'ignore') result[3] = result[3].replace(u'\u2018', u'\'') \ .replace(u'\u2019', u'\'') \ .replace(u'\u201C', u'\"') \ .replace(u'\u201D', u'\"') result[3].encode('ascii', 'ignore') return result except UnicodeEncodeError: continue util.print_message( 'ERROR: There were no matches for your track with valid ' 'ascii encodings. Could not search for your track on ' 'Spotify.', color='red', exit=True)
def finishup(config, job_sets, state_path, event_list, status, display_event, thread_list, kill_event): message = 'Performing post run cleanup' event_list.push(message=message) if not config.get('global').get('no_cleanup', False): print 'Not cleaning up temp directories' else: tmp = os.path.join(config['global']['output_path'], 'tmp') if os.path.exists(tmp): rmtree(tmp) message = 'All processing complete' if status == 1 else "One or more job failed" emailaddr = config.get('global').get('email') if emailaddr: event_list.push( message='Sending notification email to {}'.format(emailaddr)) try: if status == 1: msg = 'Post processing for {exp} has completed successfully\n'.format( exp=config['global']['experiment']) else: msg = 'One or more job(s) for {exp} failed\n\n'.format( exp=config['global']['experiment']) for job_set in job_sets: msg += '\nYearSet {start}-{end}: {status}\n'.format( start=job_set.set_start_year, end=job_set.set_end_year, status=job_set.status) for job in job_set.jobs: if job.status == JobStatus.COMPLETED: if job.config.get('host_url'): msg += ' > {job} - COMPLETED :: output hosted :: {url}\n'.format( url=job.config['host_url'], job=job.type) else: msg += ' > {job} - COMPLETED :: output located :: {output}\n'.format( output=job.output_path, job=job.type) elif job.status in [JobStatus.FAILED, JobStatus.CANCELLED]: output_path = os.path.join( job.config['run_scripts_path'], '{job}_{start:04d}_{end:04d}.out'.format( job=job.type, start=job.start_year, end=job.end_year)) msg += ' > {job} - {status} :: console output :: {output}\n'.format( output=output_path, job=job.type, status=job.status) else: msg += ' > {job} - {state}\n'.format( job=job.type, state=job.status) msg += '\n\n' m = Mailer(src='*****@*****.**', dst=emailaddr) m.send(status=message, msg=msg) except Exception as e: print_debug(e) event_list.push(message=message) display_event.set() print_type = 'ok' if status == 1 else 'error' print_message(message, print_type) logging.info("All processes complete") for t in thread_list: kill_event.set() t.join(timeout=1.0) time.sleep(2)
def poll(): params = {'request': 'next'} url = 'http://' + FRONTEND_POLLER_HOST options = {} try: job = requests.get(url, params).content job = json.loads(job) print_message(job, 'ok') except ConnectionError as ce: print_message("Error requesting job from frontend poller") print_debug(e) return -3, None if not job: return -2, None try: options['user'] = job.get('user') options['run_name'] = job.get('run_name') options['job_id'] = job.get('job_id') if not job.get('diag_type'): options['diag_type'] = 'amwg' print_message('job options: {}'.format(options), 'ok') except Exception as e: print_debug(e) return -1, options['job_id'] run_type = job.get('run_type') if not run_type: print_message("No run type in job request") return -1, None if run_type == 'diagnostic': try: sets = json.loads(job.get('diag_set')) except Exception as e: print_message('Unable to unpack diag_set') sets = '5' options['set'] = sets options['model_path'] = job.get('model_path') options['obs_path'] = job.get('obs_path') options['output_dir'] = job.get('output_dir') print_message('Got a new job with parameters:\n{}'.format(options), 'ok') handler = StartDiagHandler(options) elif run_type == 'model': handler = StartModelHandler(options) elif run_type == 'update': handler = UpdateJobHandler(options) elif run_type == 'upload_to_viewer': options['server'] = job.get('request_attr').get('server') options['username'] = job.get('request_attr').get('username') options['password'] = job.get('request_attr').get('password') options['path'] = job.get('request_attr').get('path') handler = UploadOutputHandler(options) else: print_message("Unrecognized request: {}".format(run_type)) return -1, None try: response = handler.handle() except Exception as e: print_message("Error in job handler with options \n {}".format(options)) print_debug(e) return -1, None try: print_message('Sending message to frontend poller: {}'.format(response)) handler.respond(response) except Exception as e: print_message("Error sending response to job \n {}".format(options)) print_debug(e) return -1, None return 0, None
handler.respond(response) except Exception as e: print_message("Error sending response to job \n {}".format(options)) print_debug(e) return -1, None return 0, None if __name__ == "__main__": while(True): retval, id = poll() if retval == 0: continue elif retval == -2: print_message('No new jobs', 'ok') time.sleep(5) continue elif retval == -3: time.sleep(5) continue if retval: print_message('Job run error') # send error message to frontend poller request = json.dumps({ 'job_id': id, 'request': 'error', }) url = 'http://' + FRONTEND_POLLER_HOST try: r = requests.post(url, request)
% (ip, ref, token)) print auth_page % (style, tok_str, str(port) + "-" + tenant_name + "-" + str(tenant_index)) else: if ip: bend = SOAPpy.SOAPProxy(bend_url) logon_info = bend.whois(ip) flog.debug("logon_info: " + str(logon_info)) if logon_info != []: if logoff == "0": if status > 0: print logged_page_small % (style_small, logon_info[1]) else: print logged_page % (style, logon_info[1]) else: bend.deauthenticate(ip) print auth_page % (style, "0", str(port) + "-" + tenant_name + "-" + str(tenant_index)) else: print auth_page % (style, "0", str(port) + "-" + tenant_name + "-" + str(tenant_index)) except Exception, e: util.print_message("Error", "Error occured:", str(e), "/error.html") flog.error("auth.py: exception caught: " + str(e))
def acoustid_search(): util.print_message('Analyzing audio fingerprint...') try: search = list( acoustid.match(conf.ACOUSTID_API_KEY, conf.YOUTUBE_DL_OPTS['outtmpl'])) except acoustid.NoBackendError: util.print_message('ERROR: Chromaprint library/tool not found.', color='red', exit=True) except acoustid.FingerprintGenerationError: util.print_message('ERROR: Audio fingerprint could not be calculated.', color='red', exit=True) except acoustid.WebServiceError as exc: util.print_message('ERROR: Web service request failed: {}.' \ .format(exc.message), color='red', exit=True) except Exception as ecx: util.print_message('ERROR: {}'.format(ecx.args[1]), color='red', exit=True) if len(search) == 0: util.print_message( 'Failed to find a match for your track in the ' 'MusicBrainz database.', color='red', exit=True) return get_first_valid_encoding(sorted(search, reverse=True))
def __init__(self, options=None): self.options = self.sanitize_input(options) print_message(self.options)
def sanitize_input(self): args = ['metadiags'] path_prefix = "path=" + USER_DATA_PREFIX for x in self.config: print_message('key: {}\nval: {}'.format(x, self.config.get(x))) option_key = '' option_val = '' if x == 'diag_type': option_key = '--package' # Check for valid package if self.config.get(x) != 'AMWG' and self.config.get(x) != 'amwg': print_message("{} is not a valid package".format(self.config.get(x))) return -1 option_val = self.config.get(x) elif x == 'model_path': option_key = "--model " # Check for valid paths if os.path.exists(self.config.get(x)): option_val = 'path=' + self.config.get(x) + ',climos=yes' else: print_message('model_path {} does not exist'.format(self.config.get(x))) elif x == 'obs_path': option_key = '--obs' # Check for valid obs path if os.path.exists(self.config.get(x)): option_val = 'path=' + self.config.get(x) + ',climos=yes' else: print_message('model_path {} does not exist'.format(self.config.get(x))) elif x == 'output_dir': option_key = '--outputdir' if not os.path.exists(self.config.get(x)): print_message('output_dir {} does not exist'.format(self.config.get(x))) else: option_val = self.config.get(x) print_message(option_val) elif x == 'set': option_key = '--set' sets = [] for s in self.config.get(x): if s not in self.allowed_sets: print_message('invalid set: {}'.format(s)) else: sets.append(s) # Check for valid set option_val = ' '.join(sets) # # etc etc etc moar options # else: print "Unrecognized option passed to diag handler: {}".format(x) continue args.append(option_key) args.append(option_val) return args
def initialize(argv, **kwargs): """ Parse the commandline arguments, and setup the master config dict Parameters: argv (list): a list of arguments event_list (EventList): The main list of events kill_event (threading.Event): An event used to kill all running threads __version__ (str): the current version number for processflow __branch__ (str): the branch this version was built from """ # Setup the parser pargs = parse_args(argv=argv) if pargs.version: msg = 'Processflow version {}'.format(kwargs['version']) print msg sys.exit(0) if not pargs.config: parse_args(print_help=True) return False, False, False event_list = kwargs['event_list'] event = kwargs['kill_event'] print_line(line='Entering setup', event_list=event_list) # check if globus config is valid, else remove it globus_config = os.path.join(os.path.expanduser('~'), '.globus.cfg') if os.path.exists(globus_config): try: conf = ConfigObj(globus_config) except: os.remove(globus_config) if not os.path.exists(pargs.config): print "Invalid config, {} does not exist".format(pargs.config) return False, False, False # Check that there are no white space errors in the config file line_index = check_config_white_space(pargs.config) if line_index != 0: print ''' ERROR: line {num} does not have a space after the \'=\', white space is required. Please add a space and run again.'''.format(num=line_index) return False, False, False # read the config file and setup the config dict try: config = ConfigObj(pargs.config) except Exception as e: print_debug(e) print "Error parsing config file {}".format(pargs.config) parse_args(print_help=True) return False, False, False # run validator for config file messages = verify_config(config) if messages: for message in messages: print_message(message) return False, False, False try: setup_directories(pargs, config) except Exception as e: print_message('Failed to setup directories') print_debug(e) sys.exit(1) if pargs.resource_path: config['global']['resource_path'] = os.path.abspath( pargs.resource_path) else: config['global']['resource_path'] = os.path.join( sys.prefix, 'share', 'processflow', 'resources') # Setup boolean config flags config['global']['host'] = True if config.get('img_hosting') else False config['global']['always_copy'] = True if pargs.always_copy else False config['global']['dryrun'] = True if pargs.dryrun else False config['global']['debug'] = True if pargs.debug else False config['global']['verify'] = True if pargs.verify else False config['global']['max_jobs'] = pargs.max_jobs if pargs.max_jobs else False # setup logging if pargs.log: log_path = pargs.log else: log_path = os.path.join(config['global']['project_path'], 'output', 'processflow.log') print_line(line='Log saved to {}'.format(log_path), event_list=event_list) if not kwargs.get('testing'): from imp import reload reload(logging) config['global']['log_path'] = log_path if os.path.exists(log_path): logbak = log_path + '.bak' if os.path.exists(logbak): os.remove(logbak) copyfile(log_path, log_path + '.bak') log_level = logging.DEBUG if pargs.debug else logging.INFO logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filename=log_path, filemode='w', level=log_level) logging.getLogger('globus_sdk').setLevel(logging.ERROR) logging.getLogger('globus_cli').setLevel(logging.ERROR) logging.info("Running with config:") msg = json.dumps(config, sort_keys=False, indent=4) logging.info(msg) if pargs.max_jobs: print_line(line="running with maximum {} jobs".format(pargs.max_jobs), event_list=event_list) if not config['global']['host'] or not config.get('img_hosting'): print_line(line='Not hosting img output', event_list=event_list) msg = 'processflow version {} branch {}'.format(kwargs['version'], kwargs['branch']) logging.info(msg) # Copy the config into the input directory for safe keeping input_config_path = os.path.join(config['global']['project_path'], 'input', 'run.cfg') try: copy(pargs.config, input_config_path) except: pass if config['global']['always_copy']: msg = 'Running in forced-copy mode, previously hosted diagnostic output will be replaced' else: msg = 'Running without forced-copy, previous hosted output will be preserved' print_line(line=msg, event_list=event_list) # initialize the filemanager db = os.path.join(config['global'].get('project_path'), 'output', 'processflow.db') msg = 'Initializing file manager' print_line(msg, event_list) filemanager = FileManager(database=db, event_list=event_list, config=config) filemanager.populate_file_list() msg = 'Starting local status update' print_line(msg, event_list) filemanager.update_local_status() msg = 'Local status update complete' print_line(msg, event_list) msg = filemanager.report_files_local() print_line(msg, event_list) filemanager.write_database() all_data = filemanager.all_data_local() if all_data: msg = 'all data is local' else: msg = 'Additional data needed' print_line(msg, event_list) logging.info("FileManager setup complete") logging.info(str(filemanager)) if all_data: print_line(line="skipping globus setup", event_list=event_list) else: if config['global'].get('local_globus_uuid'): endpoints = [endpoint for endpoint in filemanager.get_endpoints()] local_endpoint = config['global'].get('local_globus_uuid') if local_endpoint: endpoints.append(local_endpoint) msg = 'Checking authentication for {} endpoints'.format(endpoints) print_line(line=msg, event_list=event_list) setup_success = setup_globus(endpoints=endpoints, event_list=event_list) if not setup_success: print "Globus setup error" return False, False, False else: print_line(line='Globus authentication complete', event_list=event_list) # setup the runmanager runmanager = RunManager(event_list=event_list, event=event, config=config, filemanager=filemanager) runmanager.setup_cases() runmanager.setup_jobs() runmanager.write_job_sets( os.path.join(config['global']['project_path'], 'output', 'state.txt')) return config, filemanager, runmanager
def main(): opts = get_opts() # Paths and device current_device = opts.device train_data_path = opts.data_dir pretrained_path = opts.pretrain_model_path model_path = opts.out_dir # training settings pretrained = opts.pretrained_option num_epochs = opts.num_epochs learning_rate = opts.learning_rate num_query = opts.num_query num_passage = opts.num_passage active_learning = opts.active_learning_stage # network settings network_type = opts.network_type embed_size = opts.embed_size num_hidden_nodes = opts.num_hidden_nodes num_hidden_layers = opts.num_hidden_layers dropout_rate = opts.dropout_rate if not os.path.exists(model_path): os.makedirs(model_path) torch.manual_seed(318) if pretrained == "Yes": checkpoint = torch.load(pretrained_path) network_type = checkpoint['network_type'] embed_size = checkpoint['embed_size'] num_hidden_nodes = checkpoint['num_hidden_nodes'] num_hidden_layers = checkpoint['num_hidden_layers'] dropout_rate = checkpoint['dropout_rate'] if network_type == "append": net = AppendNet(embed_size=embed_size, num_hidden_nodes=num_hidden_nodes, num_hidden_layers=num_hidden_layers, dropout_rate=dropout_rate) if network_type == "residual": net = ResidualNet(embed_size=embed_size, num_hidden_nodes=num_hidden_nodes, num_hidden_layers=num_hidden_layers, dropout_rate=dropout_rate) net.load_state_dict(checkpoint['model']) net.to(current_device) optimizer = optim.Adam(net.parameters(), lr=learning_rate) optimizer.load_state_dict(checkpoint['optimizer']) else: if network_type == "append": net = AppendNet(embed_size=embed_size, num_hidden_nodes=num_hidden_nodes, num_hidden_layers=num_hidden_layers, dropout_rate=dropout_rate).to(current_device) if network_type == "residual": net = ResidualNet(embed_size=embed_size, num_hidden_nodes=num_hidden_nodes, num_hidden_layers=num_hidden_layers, dropout_rate=dropout_rate).to(current_device) optimizer = optim.Adam(net.parameters(), lr=learning_rate) print("Loading data") train_pos_dict, train_neg_dict, query_dict, passage_dict = load( train_data_path) print("Data successfully loaded.") print("Negative Pair dict size: " + str(len(train_neg_dict))) print("Positive Pair dict size: " + str(len(train_pos_dict))) print("Num of queries: " + str(len(query_dict))) print("Num of passages: " + str(len(passage_dict))) print("Finish loading.") arg_str = active_learning + "_" + network_type + "_" + str( num_query) + "_" + "query" + "_" + str(num_passage) + "_" + "passage" unique_path = model_path + arg_str + ".model" output_path = model_path + arg_str + ".csv" print("Total number of parameters: {}".format(net.parameter_count())) for ep_idx in range(num_epochs): train_loss = train(net, optimizer, opts, train_pos_dict, train_neg_dict, query_dict, passage_dict) print_message([ep_idx, train_loss]) with open(output_path, mode='a+') as output: output_writer = csv.writer(output) output_writer.writerow([ep_idx, train_loss]) torch.save( { "model": net.state_dict(), "optimizer": optimizer.state_dict(), "n_epoch": ep_idx, "train_loss": train_loss, "network_type": network_type, "embed_size": embed_size, "num_hidden_nodes": num_hidden_nodes, "num_hidden_layers": num_hidden_layers, "dropout_rate": dropout_rate, "num_passage": num_passage, "num_query": num_query }, unique_path)
def push_to_mongo(db, message): try: record = db.arch.find_one({'url': message.url}) if record is None: db.arch.insert_one(message.__dict__) else: util.print_message( '{0} already exists in mongo'.format(message.url)) except: e = sys.exc_info()[0] util.print_message('Exception happened {0}'.format(e)) if __name__ == '__main__': calculator = Calculator() consumer = KafkaConsumer(settings.KAFKA_TOPIC, bootstrap_servers=settings.KAFKA_SERVERS) db = connect_mongo() for m in consumer: p = util.json2obj(m.value) name = util.get_md5_hash(p.url) util.print_message(p.url) d = calculator.tf_idf(name + '.txt', p.url, p.body) tags = pick_tags(d) keywords = getattr(p, 'keywords', '') excerpt = getattr(p, 'excerpt', '') post = Post(p.url, p.title, p.body, keywords, excerpt, tags) push_to_mongo(db, post)