def sample_reduction(positive_samples, negative_samples, ratio: float): """ Reduce the number of training samples in the dataset to the match the given ratio Parameter: positive_samples (DataFrame): The positive training samples negative_samples (DataFrame): The negative training samples ratio (float): Ratio of the positive and negative samples for the training, e.g. 0.1 -> 10/% positive samples """ # apply the lower boundary to the fraction, ensure the fraction is still # in range 0 - 1 total_count = len(positive_samples.index) + len(negative_samples.index) fraction_positive = total_count * ratio / len(positive_samples.index) fraction_negative = total_count * (1 - ratio) / len(negative_samples.index) # positive limits if fraction_positive > 1.0: fraction_negative = len(positive_samples.index) * \ ((1 - ratio) / ratio) / len(negative_samples.index) fraction_positive = 1.0 # negative limits elif fraction_negative > 1.0: fraction_positive = len(negative_samples.index) * \ (ratio / (1 - ratio)) / len(positive_samples.index) fraction_negative = 1.0 positive_samples = positive_samples.sample(frac=fraction_positive) negative_samples = negative_samples.sample(frac=fraction_negative) log( f"Reduced the number of samples to\ {len(positive_samples.index)}/\ {len(negative_samples.index)} ({ratio}/ {1-ratio})", False) return positive_samples, negative_samples
def _set_up_architecture_middle_inception(self): """Set up all variable architecture elements between the static input and output by using a the inception net. :return: The fully-connected bottleneck layer (not yet merged with any old bottleneck values). """ log.log("hidden architecture: inception") return inception_builder.build(self._X)
def __init__(self, images: np.ndarray, labels: np.ndarray, bottlenecks=None): """Create a new DataBundleAdvanced. :param images: See parent class. :param labels: See parent class. :param bottlenecks: See parent class. """ # inform the user about potential memory improvements # (these refer to the casting done right after this check) if images.dtype != cf.get("img_dtype"): log.log( "WARNING: copying image array, because it has the wrong dtype: {}" .format(images.dtype)) if labels.dtype != cf.get("label_dtype"): log.log( "WARNING: copying label array, because it has the wrong dtype: {}" .format(labels.dtype)) # if the given parameters do not have the correct data type yet, we will convert them now images = np.asarray(images, dtype=cf.get("img_dtype")) labels = np.asarray(labels, dtype=cf.get("label_dtype")) # TODO why aren't we doing this for the bottlenecks? may they ever have varied? # now redirect the (maybe modified) parameters to the actual constructor of the parent class super(DataBundleAdvanced, self).__init__(images, labels, bottlenecks) # calculate and store the total number of foreground samples # (assuming that there are only two classes and the foreground class is described by "1") self._n_positive_samples = self.labels.sum()
def log_results(results, first_line="results:"): """Log all values given in results.""" log.log(first_line) for key, value in results.items(): # format and log the value value_format = criteria.get(key).format(value) log.log(" - {}: {}".format(key, value_format))
def scrape(all=False, **kwargs): if all: sources = ['http://www.europarl.europa.eu/meps/en/directory/xml?letter=&leg='] else: sources = ['http://www.europarl.europa.eu/meps/en/incoming-outgoing/incoming/xml', 'http://www.europarl.europa.eu/meps/en/incoming-outgoing/outgoing/xml', 'http://www.europarl.europa.eu/meps/en/full-list/xml'] payload={} if 'onfinished' in kwargs: payload['onfinished']=kwargs['onfinished'] if all: actives = {e['UserID'] for e in db.meps_by_activity(True)} inactives = {e['UserID'] for e in db.meps_by_activity(False)} meps = actives | inactives for unlisted in [ 1018, 26833, 1040, 1002, 2046, 23286, 28384, 1866, 28386, 1275, 2187, 34004, 28309, 1490, 28169, 28289, 28841, 1566, 2174, 4281, 28147, 28302, ]: meps.discard(unlisted) payload['id']=unlisted add_job('mep', dict(payload)) for src in sources: root = fetch(src, prune_xml=True) for id in root.xpath("//mep/id/text()"): if all: meps.discard(int(id)) payload['id']=int(id) add_job('mep', dict(payload)) if all: log(3,"mepids not in unlisted nor in directory {!r}".format(meps)) for id in meps: payload['id']=id add_job('mep', dict(payload))
def handle_read(self): data = self.recv(8192) #print(data) if not data: return try: data = loads(data) except: self.notify('Invalid json\n') return if 'command' not in data: self.notify('Missing "command" attribute', type='error') return if data['command'] in ['l', 'ls', 'list']: self.notify('scraper queue list', **get_all_jobs()) if data['command'] in ['c', 'call']: if data.get('scraper') not in self.scrapers: self.notify('Missing or invalid scraper ' + data.get('scraper')) payload = data.get('payload', {}) add_job(data['scraper'], payload) if data['command'] in ['log', 'setlog', 'setlogfile']: set_logfile(data.get('path')) log(3, 'Changing logfile to {0}'.format(data.get('path'))) log(3, '# Command `{0}` processed'.format(data['command']))
def get_engine(url): ATTEMPTS = 5 DELAY = 3 def fail(message): log(message, color=log_colors.FAIL) sys.exit(1) if url is None: fail('No database url. Check your environment.') try: engine = create_engine(url) except Exception: fail('Invalid database url. Check your environment.') else: attempt = 0 while True: try: with engine.connect(): pass except Exception: if attempt < ATTEMPTS: log(f'Could not connect to DB, retrying in {DELAY}') time.sleep(DELAY) attempt += 1 continue fail('Cannot connect to database.') else: return engine
def __init__(self): log('Gst init..') Gst.init() self.player = Gst.ElementFactory.make('playbin', 'player') fakesink = Gst.ElementFactory.make('fakesink', 'fakesink') self.player.set_property('video_sink', fakesink) bus = self.player.get_bus() bus.add_signal_watch() bus.connect('message::error', self.on_error) bus.connect('message::eos', self.on_eos) bus.connect('message::state-changed', self.on_state_change) self.stateListeners = [] self.queueListeners = [] self.PLAYING = False self.LOADED = False #can resume? self.state = None self.songqueue = [] self.shuffleBackupQueue = [] self.cursor = -1 self.MODE_REPEAT = MusicService.MODE_REPEAT_OFF self.MODE_SHUFFLE = MusicService.MODE_SHUFFLE_OFF
def load_scrapers(): scrapers = {} for scraper in os.listdir('scrapers/'): if scraper.startswith('_') or not scraper.endswith('.py'): continue try: name = scraper[:-3] import_path = 'scrapers.' + name if import_path in sys.modules: del sys.modules[import_path] s = load_source(import_path, 'scrapers/' + scraper) except: log(1, "failed to load scraper" % scraper) traceback.print_exc() continue s._queue = Queue() scrapers[name] = s s._name = name if hasattr(s, 'CONFIG'): cfg = CONFIG.copy() cfg.update(s.CONFIG) s.CONFIG = cfg else: s.CONFIG = CONFIG.copy() s.add_job = add_job s.get_all_jobs = get_all_jobs s._lock = RLock() s._job_count = 0 if s.CONFIG['abort_on_error']: s._error_queue = [False for _ in range(ERROR_WINDOW)] Thread(target=run_scraper, args=(s, ), name=s._name).start() log(3, 'scraper %s added' % scraper) return scrapers
def load(path): log.log("load ", path) with open(path, 'r', encoding='utf-8') as f: s = f.read() log.log('load: s', s) if s: return json.loads(s)
def loadandplay(self, song): self.stop() log('load and play...') self.player.set_property('uri', 'file://' + song[LOCATION]) self.player.set_state(Gst.State.PLAYING) self.LOADED = True self.PLAYING = True
def check_host(self): if self._target.lower().startswith("http://"): self.https = False self._target = domain = self._target.split("//")[-1].split("/")[0] log("Detected HTTP url, using HTTP ATK", "success") elif self._target.lower().startswith("https://"): self.https = True self._target = domain = self._target.split("//")[-1].split("/")[0] log("Detected HTTPS url, using HTTPS ATK", "success") else: if socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect_ex( (self._target, 80)) == 0: log( "Target {} as port 80 open, using HTTP ATK".format( self._target), "success") self.https = False return True else: if socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect_ex( (self._target, 443)) == 0: log( "Target {} as port 443 open, using HTTPS ATK".format( self._target), "success") self.https = True return True else: log("Target www ports are closed (80 & 443), abort.", "error") return False
def loop(self, ): epoch = int(time.time()) for exc in self._pending: if epoch >= exc.epoch_start(): def execute(): exc.execute() self._running += [( exc, self._executor.submit(execute), )] self._pending.remove(exc) log("Execution {} started".format(exc.id()), "success") for r in self._running: if epoch >= r[0].epoch_stop(): r[0].kill() time.sleep(1) if r[1].done(): log("Execution {} finished".format(r[0].id()), "success") self._running.remove(r)
def create_input_placeholder(self): """Extend the default image input by optional augmentation operations. self._X will be set in the parent class. :return: """ if cf.get("data_augmentation_online"): log.log("Extending the input placeholders with augmentation operations") # this can replace the default input node (self._X) with additional augmentation operations done in TensorFlow self._X_augmentation_input = tf.placeholder(name=cf.get("graph_input_training_layer_name") + "_augmented", shape=self._shape_data_batch, dtype=tf.float32) self._Y_augmentation_input = tf.placeholder(name='Y_augmented', shape=self._shape_labels_batch, dtype=tf.int32) self._X_augmented, self._Y_augmented = data_augmentation_online.add_augmentation_operations( self._X_augmentation_input, self._Y_augmentation_input) # network input # (dtype=tf.uint8 is not allowed) _X = tf.placeholder_with_default(name=cf.get("graph_input_training_layer_name"), shape=self._shape_data_batch, input=self._X_augmented) _Y = tf.placeholder_with_default(name="Y", shape=self._shape_labels_batch, input=self._Y_augmented) return _X, _Y else: return super().create_input_placeholder()
def _update_best_val_results(self, res_val, step): """Check whether res_val contains better results than the best seen so far and remember the answer. :param res_val: Validation results as returned by self._full_evaluation(self._ds.valid) :param step: The iteration number in which res_val were obtained. :return: """ if self.best_val_results is None \ or res_val[self._main_criteria] > self.best_val_results[self._main_criteria]: # snapshots log.log("Saving snapshot..") snapshot_path_prefix = os.path.join(self._snapshot_dir_session, "val_{}_{:.3f}".format( self._main_criteria, res_val[self._main_criteria] )) self.best_snapshot_path = self._saver.save(self._session, snapshot_path_prefix, global_step=step) self.best_val_results = res_val self.iterations_since_best_found = 0 # log the new high score best_val_txt = criteria.get(self._main_criteria).format(self.best_val_results[self._main_criteria]) log.log("Updated best model with validation {} of {}".format( self._main_criteria, best_val_txt) )
def parseAndSave(content,currentWebsite): # save_html_content(currentWebsite.id, websiteContents) soup = BeautifulSoup(content, 'lxml') items = soup.find_all('a') print("A Items", len(items)) COUNT = 0 if items: for a in items: if a.string: url, text = a.get('href'), a.string.encode('utf-8').strip() check_pass = check_content(url, text) if check_pass: url = complement_url(url, currentWebsite.url) if url: result = save_info_feed(url, text, currentWebsite.id, currentWebsite.company.id) if result: COUNT += 1 if COUNT == 0: log(NOTICE, "#{id} {name} {site} 没抓到更新 {count} 条".format(id=currentWebsite.company.id, name=currentWebsite.company.name_cn.encode( 'utf-8').strip(), site=currentWebsite.url, count=COUNT)) else: log(RECORD, "#{id} {name} {site} 抓到更新 {count} 条".format(id=currentWebsite.company.id, name=currentWebsite.company.name_cn.encode( 'utf-8').strip(), site=currentWebsite.url, count=COUNT))
def journal(activity, detail): what = {'t': time.time(), 'a': str(activity), 'd': detail, } journaldb.insert(what) log("Journal updated: ", activity)
def __on_run_done(self, work_result): """ Handles user feedback on success or failure of select function. """ self.run_button.setEnabled(True) self.parent.remove_worker_by_jid(work_result.job_id) result = work_result.result if not result: err_log('Error running select..') return try: if 'all_tables' in result: all_tables = result['all_tables'] self.parent.populate_table_lists(all_tables) else: text = 'Error running select: \'' + str(result) + '\'' err_log(text) # if self.table: # self.table.close_table() # self.table.close() # QWidget().setLayout(self.table_container.layout()) # # self.table = HapiTableView(self, new_table_name) # layout = QtWidgets.QGridLayout(self.table_container) # layout.addWidget(self.table) # self.table_container.setLayout(layout) log('Select successfully ran.') except Exception as e: err_log('Error running select.') debug(e)
def get_instance_from_address(self, filepath_abi, contract_address): """Returns a contract instance object from address Does a (crude) check that the deployment at that address is not empty. Creates a contract instance for use with all the 'Contract' methods specified in web3.py Returns: self.contract_instance(class ContractInterface): see above """ self.contract_address = contract_address with open(filepath_abi, 'r') as fd: contract_abi = json.load(fd) try: contract_bytecode_length = len( self.web3.eth.getCode(self.contract_address).hex()) except web3.exceptions.InvalidAddress as e: log("Contract address if invalid: {}".format(e), "error", errcode=-127) try: assert (contract_bytecode_length > 4), f"Contract not deployed at {self.contract_address}." except AssertionError as e: print(e) raise #else: #print(f"Contract deployed at {self.contract_address}. This function returns an instance object.") self.contract_instance = self.web3.eth.contract( abi=contract_abi[0], address=self.contract_address) return self.contract_instance
def stop(self): """Stop the previously-started runtime evaluation.""" self._end_time = time() self._elapsed_seconds = self._end_time - self._start_time log.log("TimeWatcher Stop {}: {}".format( self._name, self.seconds_to_str(self._elapsed_seconds)))
def visualize_train_valid(train_split, val_split, train_predictions, val_predictions): """Static helper method to save an image of a CollageEvaluation containing training and validation data. :param train_split: :param val_split: :param train_predictions: :param val_predictions: :return: """ # create the collage collage = CollageEvaluation( splits={ SPLIT_KEY_TRAIN: train_split, SPLIT_KEY_VAL: val_split, # SPLIT_KEY_TEST: self._ds.test, # usually we don't need this here }, predictions={ SPLIT_KEY_TRAIN: train_predictions, SPLIT_KEY_VAL: val_predictions, # SPLIT_KEY_TEST: self.predict(self._ds.test), # usually we don't need this here }) # save the image file collage_key = "split_evaluation" collage_file_path = collage.save_img_file(collage_key) log.log("Saved image collage to visualize evaluation to {}".format( collage_file_path))
def start(self): """Starts counting the time.""" self._start_time = time() self._end_time = None self._elapsed_seconds = None log.log("TimeWatcher Start: {}".format(self._name))
def _ignore_file(self, dataset_key: str, ignore_key: str): """Add the given file to the ignore list. Note, this will not yet remove any already loaded files from e.g. self._image_infos. """ # if the ignore list hasn't been initialized yet, load existing info from files first if self._ignore_dicts is None: self._read_ignore_dicts() if ignore_key not in self._ignore_dicts[dataset_key]: log.log("Ignoring {} of dataset {}.".format( ignore_key, dataset_key )) # add file to the internal ignore list if dataset_key not in self._ignore_dicts: self._ignore_dicts[dataset_key] = dict() self._ignore_dicts[dataset_key][ignore_key] = True # persisting: add one line per image to the ignore list file ignore_file_path = self._get_ignore_file_path(dataset_key) with open(ignore_file_path, 'a') as file: file.write(ignore_key + "\n") # TODO check whether this file is used in the currently loaded image_infos else: log.log("Already ignored: {} of dataset {}.".format( ignore_key, dataset_key ))
def _unignore_file(self, dataset_key: str, ignore_key: str): """Remove the given file from the ignore list. Note, this will not yet add any already added files from e.g. self._image_infos. """ # if the ignore list hasn't been initialized yet, load existing info from files first if self._ignore_dicts is None: self._read_ignore_dicts() log.log("Unignoring {} of dataset {}.".format( ignore_key, dataset_key )) # remove file from the internal ignore list if dataset_key in self._ignore_dicts and ignore_key in self._ignore_dicts[dataset_key]: del self._ignore_dicts[dataset_key][ignore_key] # persisting: remove referencing file lines # each line is one ignored element ignore_file_path = self._get_ignore_file_path(dataset_key) file_str = "" lines = file_handler.read_txt_lines(ignore_file_path) for line in lines: if line != ignore_key: file_str += line + "\n" with open(ignore_file_path, "w") as file: file.write(file_str)
def votemeta(line, date): log(3, 'vote title is "%s"' % line) res={'rapporteur': []} m=docre.search(line) if m: doc=m.group(1).replace(' ', '') log(4,'setting doc to "%s"' % doc) res['doc']=doc reports=db.get("dossiers_by_doc", doc) if reports: res['epref']=[report['procedure']['reference'] for report in reports] if len(reports) > 1: log(3,"more than 1 dossier referencing document %s, %s" % (doc,[d['procedure']['reference'] for d in reports])) else: if doc in VOTE_DOX_RE: res['epref']=[VOTE_DOX_RE[doc]] elif doc not in ignoredox: log(2,'%s despite matching regex could not associate dossier with vote in "%s"' % (doc,line)) return res m=refre.search(line) if m and db.get('ep_dossiers',m.group(1)): res['epref']=[m.group(1)] return res for k,v in VOTE_DOX.items(): if k in line: res['epref']=[v] return res log(4,'no associated dossier for: "%s"' % line) return res
def handle_noargs(self, **options): try: log('Debug', 'Running FUNF decryption script') decrypt() except Exception as e: log('Error', 'Exception thrown from FUNF decryption script: ' + str(e))
def _build_production_model(model_def, best_params, x, y): log("Production model build started at %s\n" % now()) super_model = model_def.model(best_params) super_model.fit(x, y) return super_model
def _run_all_models(self, refactoring, refactoring_name, dataset, features, scaler, x, y, x_train, x_tests, y_train, y_tests, test_names): """ For each model, it: 1) Performs the hyper parameter search 2) Performs k-fold cross-validation 3) Persists evaluation results and the best model """ for model in self._models_to_run: model_name = model.name() if TEST: model_name += " test" try: log("\nBuilding Model {}".format(model.name())) self._start_time() test_scores, model_to_save = self._run_single_model(model, x, y, x_train, x_tests, y_train, y_tests) # log test scores log(format_results_single_run(dataset, refactoring_name, test_names, model_name, test_scores["precision"], test_scores["recall"], test_scores['accuracy'], test_scores['tn'], test_scores['fp'], test_scores['fn'], test_scores['tp'], model_to_save, features)) # we save the best estimator we had during the search model.persist(dataset, refactoring_name, features, model_to_save, scaler) self._finish_time(dataset, model, refactoring) except Exception as e: log("An error occurred while working on refactoring " + refactoring_name + " model " + model.name() + " with datasets: " + str(test_names)) log(str(e)) log(str(traceback.format_exc()))
def mark_question_as_resolved(self, question_id): try: self.cur.execute( "UPDATE question SET has_answer=TRUE WHERE question_id=%s", (question_id, )) except: log("question reolution db update failed")
def _extract_windows(self, img: ImageInfo, convert_raw_to_np=True): """Extract all sliding windows from the given img. Essentially, this is a wrapper for Window.extract_windows(img) to allow additional steps required by subclasses. Exceptions will be caught and replaced by an empty list along with an error message, because we don't want the complete inference process to get stopped because of single images. """ try: windows_raw, windows_info = Window.extract_windows(img, convert_raw_to_np) if len(windows_raw) < 1: raise ValueError("Could not extract any windows from the given image") return windows_raw, windows_info except FileNotFoundError: log.log(" .. Skipped {}, because the file could not be found".format( img.path_resized )) return [], [] except: log.log(" .. Skipped {}, because of an unexpected error:\n{}".format( img.path_resized, traceback.format_exc() )) return [], []
def albumart(songtitle): try: return ImageQt.ImageQt( cropsquare(Image.open(artname(songtitle) + '.jpg'))) except: log(traceback.format_exc()) return ImageQt.ImageQt(cropsquare(Image.open('img/example3.jpg')))
def load_file(filename): #pdb.set_trace() log.log('Debug', 'Trying to populate db with ' + filename); mConnector = ConnectorFunf.objects.all()[0] db = database.Database() anonymizerObject = Anonymizer() documents_to_insert = defaultdict(list) proc_dir = os.path.join(mConnector.decrypted_path, 'processing') if not os.path.exists(proc_dir): os.makedirs(proc_dir) decrypted_filepath = os.path.join(mConnector.decrypted_path, filename) processing_filepath = os.path.join(proc_dir,filename) current_filepath = decrypted_filepath if os.path.exists(decrypted_filepath) and not os.path.exists(processing_filepath): try: # move to processing shutil.move(decrypted_filepath, proc_dir) current_filepath = processing_filepath # open connection to db file conn = sqlite3.connect(processing_filepath) cursor = conn.cursor() # get the meta data from db file meta = {} (meta['device'], meta['uuid'], meta['device_id'], meta['sensible_token'], meta['device_bt_mac']) = \ cursor.execute('select device, uuid, device_id, sensible_token, device_bt_mac from file_info').fetchone() meta['device_id'] = anonymizerObject.anonymizeValue('device_id',meta['device_id']) #pdb.set_trace() # get the user associated with the token #meta['user'] = authorization_manager.getAuthorizationForToken(\ # 'connector_funf.submit_data', meta['token']).user meta['user'] = '******' for row in cursor.execute('select * from data'): doc = row_to_doc(row, meta['user'], anonymizerObject ) if doc == None: continue documents_to_insert[doc['probe']].append(dict(doc.items() + meta.items())) cursor.close(); #pdb.set_trace() for probe in documents_to_insert: db.insert(documents_to_insert[probe], probe) os.remove(current_filepath); except Exception as e: log.log('Error', str(e)); if not 'already exists' in str(e): top = traceback.extract_stack()[-1] fail.fail(current_filepath, load_failed_path, 'Exception with file: ' + filename\ + '\n' + ', '.join([type(e).__name__, os.path.basename(top[0]), str(top[1])])) else: pass return False
def write_xml_lmf(*args, **kwds): # An XML LMF file contains one lexical resource, itself containing lexicon(s) wrapper_rw(lmf_write, *args, **kwds) # Count total number of entries to report to user entries_nb = 0 for lexicon in args[0].get_lexicons(): entries_nb += lexicon.count_lexical_entries() log("Successfully wrote %s LMF entries into XML LMF file '%s'." % (entries_nb, args[1]))
def write_tex(*args, **kwds): # A LaTeX file contains one or several lexicons and informations about the lexical resource wrapper_rw(tex_write, *args, **kwds) # Count total number of entries to report to user entries_nb = 0 for lexicon in args[0].get_lexicons(): entries_nb += lexicon.count_lexical_entries() log("Successfully wrote %s LMF entries into LaTeX file '%s'." % (entries_nb, args[1]))
def config(self, request): log.log('Debug', 'GET for config') access_token = request.REQUEST.get('access_token', '') authorization = self.pipe.getAuthorization(access_token) config = self.readConfig(authorization['user']) if config: return HttpResponse(config) else: return HttpResponse(status='500')
def write_odt(*args, **kwds): # Import only when needed because it requires installation of Python package 'odf' from output.odt import odt_write # A document file contains one or several lexicons and informations about the lexical resource wrapper_rw(odt_write, *args, **kwds) # Count total number of entries to report to user entries_nb = 0 for lexicon in args[0].get_lexicons(): entries_nb += lexicon.count_lexical_entries() log("Successfully wrote %s LMF entries into document file '%s'." % (entries_nb, args[1]))
def delete_book(book_id): log("Deleting book with _id:", book_id) book = {"_id": ObjectId(book_id)} original = booksdb.find_one(book) result = booksdb.remove(book) journal('DELETE', {'in': None, 'out': original}) return jsonify({'result': result})
def handle(self, *args, **options): if len(args) < 1: log('Error','You have to give me the filename') return; #log('Debug','Will try to decrypt ' + args[0]); try: decrypt_file_from_upload(args[0]); except Exception as e: log.log('Error','Exception while single decrypting file ' + args[0] + ': ' + str(e))
def config(request): pdb.set_trace(); log.log('Debug', 'GET for config') access_token = request.REQUEST.get('access_token', '') #authorization = self.pipe.getAuthorization(access_token) #config = self.readConfig(authorization['user']) config = readConfig('dummy') if config: return HttpResponse(config) else: return HttpResponse(status='500')
def main(): old_version=None cfg_manager=config.get_cfg_manager() LOG.log("try to read config file :./release.conf") cfg=cfg_manager.parse_config_file('./release.conf') LOG.log("config:%s" % cfg) source=source_manager.SourceManager(cfg['source']) build=build_manager.BuildManager(cfg['build']) collect=collect_manager.CollectManager(cfg['collect']) monitor=server.Server(cfg['monitor']) monitor.status() monitor.start(source,build,collect)
def add_all(cls, articles): if not articles: return session = DBSession() session.add_all(articles) try: session.flush() session.commit() except SQLAlchemyError: session.rollback() log.log(message=articles[0].url) # raise(SQLAlchemyError) finally: session.close()
def read_xml_lmf(*args, **kwds): # To access options from pylmflib import options global options # An XML LMF file contains one lexical resource, itself containing lexicon(s) lexical_resource = wrapper_rw(lmf_read, *args, **kwds) # Count total number of entries to report to user entries_nb = 0 for lexicon in lexical_resource.get_lexicons(): entries_nb += lexicon.count_lexical_entries() if options.cross_references: # Verify lexicon coherence lexicon.check_cross_references() log("Successfully created %s LMF entries from XML LMF file '%s'." % (entries_nb, args[0])) return lexical_resource
def upload(request): log.log('Debug', 'Received POST') scope = 'all_probes' access_token = request.REQUEST.get('access_token', '') if request.META['CONTENT_TYPE'].split(';')[0]=='multipart/form-data': try: uploaded_file = request.FILES['uploadedfile'] if uploaded_file: #try: #authorization = authorization_manager.getAuthorizationForToken(scope, access_token) mConnector = ConnectorFunf.objects.all()[0]; #if ('error' in authorization) or (authorization == None): # upload_path = mConnector.upload_not_authorized_path; #else: # upload_path = mConnector.upload_path upload_path = mConnector.upload_path backup_path = mConnector.backup_path if not os.path.exists(upload_path): os.makedirs(upload_path) if not os.path.exists(backup_path): os.makedirs(backup_path) filename = uploaded_file.name.split('.')[0].split('_')[0]+'_'+access_token+'_'+str(int(time.time()))+'.db' filepath = os.path.join(upload_path, filename) while os.path.exists(filepath): parts = filename.split('.db'); counted_parts = re.split('__',parts[0]); counter = -1; if len(counted_parts) > 1: counter = int(counted_parts[1]); filename = counted_parts[0] + '__' + str(counter + 1) + '.db' filepath = os.path.join(upload_path, filename) write_file(filepath, uploaded_file) shutil.copy(filepath, os.path.join(backup_path, filename)) # run decryption in the background log.log('Debug', settings.ROOT_DIR + './manage.py ' + filename) p = Popen([settings.ROOT_DIR + './manage.py','funf_single_decrypt',filename], stdout=PIPE, stderr=PIPE) #except Exception as e: # log.log('Error', 'Could not write: ' + str(e)) # return HttpResponse(status='500') #else: return HttpResponse(json.dumps({'ok':'success'})) else: log.log('Error', 'failed to read') except KeyError as e: log.log('Error', 'Key error: ' + str(e)) pass # bad request return HttpResponse(status='500')
def request(self, url, headers=headers, cookies={}): delay = 60 while(True): try: r = requests.get(url, headers=headers, timeout=10) except: print "网络异常,休眠", delay, "秒" log.log(message=u"网络异常" + url) time.sleep(delay) delay += 60 continue if self.is_valid(r): return self.convert_encode(r) elif r.status_code == 503: log.log(message=u"503 error " + url) time.sleep(60) continue return u""
def search(self): "Search torrents and update all hits corresponding to the current episode" self.update_airdates() airdate = self.get_airdate(self.hits.current) if not airdate: log.verbose("%s (%s) doesn't have an air date" % (self.name, self.hits.current)) # Try to determine if there is still an episode which airs after episode, airdate = self.get_episodes_after(self.hits.current) if episode: self.hits.current = episode if airdate and airdate >= date.today(): log.verbose("%s (%s) will air on %s" % (self.name, self.hits.current, airdate)) else: torrents = self.search_by_episode(self.hits.current) log.log(0 if len(torrents) else 1, "%d torrent(s) found for '%s' (%s)" % (len(torrents), self.name, self.hits.current)) if torrents: torrents = [t.merge(self.hits.torrent(self.hits.current, t)) for t in torrents] self.hits.torrents(self.hits.current, torrents) self.hits.save()
def read_mdf(*args, **kwds): import wrapper # To access options from pylmflib import options global options # Find lexicon configuration if any try: id = kwds['id'] except KeyError: id = None if id is not None and wrapper.lexical_resource is not None: lexicon = wrapper.lexical_resource.get_lexicon(id) # Add lexicon argument kwds.update({'lexicon': lexicon}) # An MDF file contains one lexicon only, but wrapper_rw() function encapsulates it into a lexical resource lexical_resource = wrapper_rw(mdf_read, *args, **kwds) for lexicon in lexical_resource.lexicon: if options.cross_references: # Verify lexicon coherence lexicon.check_cross_references() log("Successfully created %s LMF entries from MDF file '%s'." % (lexicon.count_lexical_entries(), lexicon.get_entrySource())) return lexical_resource
def decrypt_file(directory_to_decrypt, f): #pdb.set_trace() proc_dir = os.path.join(directory_to_decrypt, 'processing') if not os.path.exists(proc_dir): os.makedirs(proc_dir) upload_filename = os.path.join(directory_to_decrypt, f) proc_filename = os.path.join(proc_dir, f) decrypted_filename = os.path.join(mConnector.decrypted_path, f) curr_filename = upload_filename #for keeping track of the file's current location decryption_success = False; try: # check if still exists, might have been moved in another thread if os.path.exists(upload_filename) and not os.path.exists(proc_filename): # move it to processing shutil.move(upload_filename, proc_dir) curr_filename = proc_filename # decrypt if decrypt_if_not_db_file(proc_filename, key, extension=None): decryption_success = True; fail.safe_move(proc_filename, mConnector.decrypted_path) log.log('Debug','Still here #1') curr_filename = decrypted_filename orig_filename = proc_filename + '.orig' if os.path.exists(orig_filename): os.remove(orig_filename) #log.log('Debug','Still here #2') database_single_population.load_file(f) return True else: return False except Exception as e: #find out when it happened action = ''; if curr_filename == upload_filename: action = 'moving to /processing' elif curr_filename == proc_filename and decryption_success == False: action = 'decrypting' elif curr_filename == proc_filename and decryption_success == True: action = 'moving to /decrypted' elif curr_filename == decrypted_filename: action = 'removing the .orig file of' try: if not str(e).contains('already exists'): fail.fail(curr_filename, mConnector.decryption_failed_path, 'Exception thrown: ' + str(e) + '. While ' + action + ' file: ' + f) log.log('error', 'README ^^^^^^^^^^^^^') else: log.log('error','Exception thrown: ' + str(e) + '. While ' + action + ' file: ' + f); except Exception as e1: pass return False; '''
def upload(self, request): log.log('Debug', 'Received POST') scope = 'all_probes' access_token = request.REQUEST.get('access_token', '') if request.META['CONTENT_TYPE'].split(';')[0]=='multipart/form-data': # if not request.META['CONTENT_TYPE']=='multipart/form-data;boundary=*****': try: uploaded_file = request.FILES['uploadedfile'] if uploaded_file: try: #authorization = self.pipe.getAuthorization(access_token, scope=scope) authorization = '' if 'error' in authorization: upload_path = service_config.CONNECTORS["connector_funf"]["config"]["upload_not_authorized_path"] else: upload_path = service_config.CONNECTORS["connector_funf"]["config"]["upload_path"] if not os.path.exists(upload_path): os.mkdir(upload_path) filepath = os.path.join(upload_path, uploaded_file.name.split('.')[0].split('_')[0]+'_'+access_token+'_'+str(int(time.time()))+'.db') self.write_file(filepath, uploaded_file) except Exception as e: log.log('Error', 'Could not write: ' + str(e)) return HttpResponse(status='500') else: return HttpResponse(json.dumps({'ok':'success'})) else: log.log('Error', 'failed to read') except KeyError as e: log.log('Error', 'Key error: ' + str(e)) pass # bad request return HttpResponse(status='500')
def test_log(self): import os ## Test with options self.options.log_filename = "test/log.txt" msg = "These are options." log(msg, self.options) # Test log file expected_line = "These are options." + EOL log_file = open_read(self.options.log_filename) self.assertEqual(log_file.readline(), expected_line) log_file.close() ## Test without options msg = "This is a message." log(msg) # Test log file expected_lines = ["These are options." + EOL, "This is a message." + EOL] log_file = open_read(self.options.log_filename) self.assertListEqual(log_file.readlines(), expected_lines) ## Test verbose mode (need to reset log filename) self.options.verbose = True log_filename = self.options.log_filename self.options.log_filename = None log(msg, self.options) # Test that log file remains unchanged log_file = open_read(log_filename) self.assertListEqual(log_file.readlines(), expected_lines) ## Test unwrittable file self.options.log_filename = "/usr/log.txt" test = False try: log(msg, self.options) except Error: test = True self.assertTrue(test) ## Remove log file os.remove(log_filename)
def read_sort_order(*args, **kwds): sort_order = wrapper_rw(order_read, *args, **kwds) log("Successfully read sort order: " + str(sort_order)) return sort_order
def update_book(book_id): book = {"_id": ObjectId(book_id)} log("Updating book with _id:", book_id) original = booksdb.find_one(book) log("Incoming json:", request.json) details = request.json del (details['_id']) log("Updated record: ", details) try: valid_book = book_schema(details) pprint(valid_book) log('Book validated.') except Exception as ve: log(ve) return jsonify({'error': unicode(ve), 'status': 'Error'}), 415 log("Updating with: ", valid_book) result = booksdb.update(book, valid_book) journal('UPDATE', {'in': valid_book, 'out': original}) return jsonify({'result': result})
def get_book_details(book_id): result = booksdb.find_one({"_id": ObjectId(book_id)}) result["_id"] = str(result["_id"]) pprint(result) log("Returning book search for", book_id) return jsonify({'details': result})
def cleanFailedFilenames(failed_filenames): for filename in failed_filenames: log.log('Debug', 'File: ' + str(filename) + ' already exists.')
def write_mdf(*args, **kwds): # As an MDF file can only contain one lexicon, create as many MDF files as lexicons in the lexical resource (TODO: rename files) for lexicon in args[0].get_lexicons(): wrapper_rw(mdf_write, lexicon, *args[1:], **kwds) log("Successfully wrote %s LMF entries into MDF file '%s'." % (lexicon.count_lexical_entries(), args[1]))
from bson.objectid import ObjectId from pprint import pprint import json import time import pymongo from flask import Flask, jsonify, request from voluptuous import Schema, Optional, Required, Match from utils.log import log import isbntools app = Flask(__name__, static_url_path='') log("Beginning.") db_host = 'localhost' db_port = 27017 dbclient = pymongo.MongoClient(db_host, db_port) db = dbclient['c-lib'] booksdb = db['books'] journaldb = db['journal'] book_schema = Schema({ Optional('_id'): Match('^(?=[a-f\d]{24}$)(\d+[a-f]|[a-f]+\d)'), Required('authors'): unicode, Required('comment'): unicode, Required('coordinates'): [int, int], Required('created'): float,
def fail(filename, failed_directory_path, message): log.log("Error", message) safe_move(filename, failed_directory_path)
def add_book_by_isbn(): isbnservice = "wcat" request.get_data() try: jsonstuff = json.loads(request.data) except Exception as e: log("JSON Decoding fail: ", e, request.data) if not jsonstuff or not 'isbn' in jsonstuff or \ isbntools.notisbn(jsonstuff['isbn']): log("Invalid ISBN: ", jsonstuff) return jsonify({'status': "Invalid ISBN"}), 400 else: isbn = str(jsonstuff['isbn']) log("ISBN entered:", isbn) book = booksdb.find_one({"isbn": isbn}) if book: log("Book known") pprint(book) book['_id'] = str(book['_id']) return jsonify({'book': book, 'status': 'Book existant'}), 201 try: meta = isbntools.meta(isbn, service=isbnservice) log("META found:", meta) except Exception as e: log("META not found: ", e) meta = {'Publisher': "Unknown", 'Language': "Unknown", 'Title': "Unkown", 'Authors': ["Unkown"], 'Year': "Unkown"} # TODO: Allow/offer manual entry return jsonify({'status': "Metaserver lookup failed."}), 504 try: book = { 'isbn': isbn, 'publisher': unicode(meta['Publisher']), 'language': unicode(meta['Language']), 'title': unicode(meta['Title']), 'authors': str(meta['Authors']), 'year': int(meta['Year']), 'created': time.time(), 'modified': time.time(), 'coordinates': [0, 0], 'status': u'None', 'tags': [], 'comment': u'' } except TypeError: return jsonify({'error'}), 415 log(book) try: valid_book = book_schema(book) pprint(valid_book) log('Book validated.') except Exception as ve: log(ve) return jsonify({'error': unicode(ve), 'status': 'Error'}), 415 booksdb.insert(valid_book) valid_book['_id'] = str(valid_book['_id']) journal('ADD', {'in': valid_book, 'out': None}) return jsonify({'book': book, 'status': 'Book created'}), 201
def read_config(*args, **kwds): lexical_resource = wrapper_rw(config_read, *args, **kwds) log("Successfully read config") return lexical_resource
def handle_noargs(self, **options): try: log('Debug', 'Running database population script') populate() except Exception as e: log('Error', 'Exception thrown from database population script: ' + str(e))
def run(db): print 'running' authorizationManager = AuthorizationManager() decrypted_path = settings.CONNECTORS['connector_funf']['config']['decrypted_path'] load_failed_path = settings.CONNECTORS['connector_funf']['config']['load_failed_path'] #TODO raw_filenames = [filename for filename in os.listdir(decrypted_path) if fnmatch.fnmatch(filename, '*.orig')] anonymizerObject = Anonymizer() raw_filenames = raw_filenames[:settings.CONNECTORS['connector_funf']['config']['max_population_files']] filenames = [os.path.join(decrypted_path, filename) for filename in raw_filenames] print raw_filenames proc_dir = os.path.join(decrypted_path, 'processing') failed_filenames = [] for f in filenames: try: shutil.move(f, proc_dir) except Exception as e: failed_filenames.append(os.path.basename(f)) raw_filenames = [e for e in raw_filenames if e not in failed_filenames] filenames = [os.path.join(proc_dir, filename) for filename in raw_filenames] cleanFailedFilenames(failed_filenames) cursor = None documents_to_insert = defaultdict(list) filenames_to_remove = [] nof_files = len(filenames) file_count = 0 for filename in filenames: file_count += 1 if not os.path.exists(filename): continue log.log('Debug', 'Populating to DB, file(%d/%d): %s' % (file_count, nof_files, filename)) try: conn = sqlite3.connect(filename) cursor = conn.cursor() except Exception as e: fail.fail(filename, load_failed_path, 'Exception thrown:' + str(e) + '. While trying to open sqlite file: ' + filename) continue try: device = cursor.execute('select device from file_info').fetchone()[0] uuid = cursor.execute('select uuid from file_info').fetchone()[0] device_id = '' try: device_id = anonymizerObject.anonymizeValue('device_id',str(cursor.execute('select device_id from file_info').fetchone()[0])) # device_id = str(cursor.execute('select device_id from file_info').fetchone()[0]) except Exception as e: fail.fail(filename, load_failed_path, 'Exception thrown: ' + str(e) + '. While trying to extract device_id from file: ' + filename) continue #TODO: replace device_id with token try: # user = anonymizerObject.anonymizeValue('user', authorizationManager.getAuthorizationForToken('connector_funf', 'all_probes', device_id)['user']) user = authorizationManager.getAuthorizationForToken('connector_funf', 'all_probes', device_id)['user'] except KeyError: user = None if not user: log.log('Debug', 'User does not exist for device id: ' + str(device_id)) fail.fail(filename, load_failed_path, 'No user found in database. Device id: ' + str(device_id)) continue for row in cursor.execute('select * from data'): name = row[1] timestamp = row[2] #TODO: separate this sanitization data_raw = row[3].replace('android.bluetooth.device.extra.DEVICE','android_bluetooth_device_extra_DEVICE') data_raw = data_raw.replace('android.bluetooth.device.extra.NAME', 'android_bluetooth_device_extra_NAME') data_raw = data_raw.replace('android.bluetooth.device.extra.CLASS', 'android_bluetooth_device_extra_CLASS') data_raw = data_raw.replace('android.bluetooth.device.extra.RSSI', 'android_bluetooth_device_extra_RSSI') data = json.loads(data_raw) doc = {} doc['_id'] = hashlib.sha1(json.dumps(data)).hexdigest()+'_'+user+'_'+str(int(timestamp)) doc['uuid'] = uuid doc['device'] = device doc['device_id'] = device_id doc['user'] = user doc['probe'] = data['PROBE'].replace('.','_') doc['data'] = anonymizerObject.anonymizeDocument(data, doc['probe']) doc['name'] = name doc['timestamp'] = float(timestamp) doc['timestamp_added'] = time.time() documents_to_insert[doc['probe']].append(doc) except Exception as e: fail.fail(filename, load_failed_path, 'Exception thrown: ' + str(e) + '. While extracting data from file: ' + filename) # traceback.print_exc(file=sys.stdout) continue cursor.close() log.log('Debug', 'Adding file to be populated') filenames_to_remove.append(filename) #TODO: make sure that the duplicates logic works for probe in documents_to_insert: try: db.insert(documents_to_insert[probe], probe) except Exception as e: # print 'problem!!!' + probe + ' ' # traceback.print_exc(file=sys.stdout) pass for filename in filenames_to_remove: print "removing ",filename os.remove(filename)