def interpret_output(data_frame, col_name=col_name_class_out, new_col_name=col_name_result, storage_level=0, storage_name='', log=1): df = data_frame.copy() category_list = CategoryListHandler.read_categories() category_vectors = Vectorizer.get_word_vectors(category_list) df[new_col_name] = df.apply(lambda x: [ ClassificationInterpreterCustom1.get_highest_similarity( x[col_name], category_list, category_vectors) ], axis=1) log_text = 'Categories have been determined (' + str(len( df.index)) + ' entries).' if storage_level >= 1 and storage_name != '': storage_name = storage_name + ClassificationInterpreterCustom1.ext_categorized Storage.store_pd_frame(df, storage_name) log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').' if log: SessionLogger.log(log_text) return df
def create_out_vectors(data_frame, col_name=col_name_categories, new_col_name=new_col_name_cat_vec, storage_level=0, storage_name=''): df = data_frame.copy() df[ClassificationInterpreterCustom1.one_word_cat] = df.apply( lambda x: ClassificationInterpreterCustom1.extract_one_word_cat(x[ col_name]), axis=1) vectorized_df = Vectorizer.vectorize( df, col_name=ClassificationInterpreterCustom1.one_word_cat, new_col_name=new_col_name, storage_level=0, log=0) vectorized_df = vectorized_df.drop( columns=[ClassificationInterpreterCustom1.one_word_cat]) vectorized_df[new_col_name] = vectorized_df.apply( lambda x: (x[new_col_name] + 1) / 2, axis=1) # adjust to softmax codomain log_text = 'Category vectors for classifier training have been created (' + str( len(data_frame.index)) + ' entries).' if storage_level >= 1 and storage_name != '': storage_name = storage_name + ClassificationInterpreterCustom1.ext_out_vecs Storage.store_pd_frame(vectorized_df, storage_name) log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').' SessionLogger.log(log_text) return vectorized_df
def __init__(self, db_name): Storage.__init__(self) self._db_name = db_name DBUtil.execute_command( db_name, "CREATE TABLE IF NOT EXISTS Results (name TEXT PRIMARY KEY, value BLOB, started DATETIME, runtime FLOAT)" )
def remove_stopwords(data_frame, custom_stop_words=None, download_live_stopwords=0, col_name=col_name, storage_level=0, storage_name='', log=1): df = data_frame.copy() stop_words = StopwordHandler.read_stopwords() if download_live_stopwords: stop_words = stop_words.union( StopwordDownloaderNLTK.get_stopwords(store=0)) stop_words = StopWordRemoverCustom.capitalize_words(stop_words) if custom_stop_words is not None: stop_words = stop_words.union(custom_stop_words) df[StopWordRemoverCustom.new_col_name] = df.apply( lambda x: StopWordRemoverCustom.process_text( x[col_name], stop_words), axis=1) log_text = 'Removed stop words from documents (' + str(len( df.index)) + ' entries).' if storage_level >= 1 and storage_name != '': Storage.store_pd_frame(df, storage_name) log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + StopWordRemoverCustom.new_col_name + '\').' if log: SessionLogger.log(log_text) return df
def downloading(self, download_callback=None, download_complete=print): files = Files() while True: if not files.empty(): download_file = files.get() download_from_ip = download_file.file_from.ip download_from_card = download_file.file_from.essid download_url = 'http://%s/cgi-bin/wifi_download?fn=%s' % ( download_from_ip, download_file.get_path()) download_to = Storage().dir() if self.separate: download_to = os.path.join(download_to, download_from_card) if not os.path.exists(download_to): os.mkdir(download_to) download_to = os.path.join(download_to, download_file.get_name()) try: urllib.urlretrieve(download_url, download_to, download_callback) Storage().add(File(download_file.get_name())) if download_complete: download_complete(download_to) except urllib.ContentTooShortError: print('Oops!!') time.sleep(0.1)
def split_train_test(identifier=None, data_frame=None): if data_frame is None: data_frame = Storage.load_pd_frame(identifier) split_ratio = SessionConfigReader.read_value( TrainTestSplitterCustom1.split_ratio_key) if split_ratio > 1: split_ratio = 1 random_state = SessionConfigReader.read_value( TrainTestSplitterCustom1.random_state_key) if isinstance(random_state, int): train = data_frame.sample(frac=split_ratio, random_state=random_state) else: train = data_frame.sample(frac=split_ratio) test = data_frame.drop(train.index) if identifier is None: identifier = SessionConfigReader.read_value( TrainTestSplitterCustom1.corpus_identifier_key) train_name = identifier + TrainTestSplitterCustom1.ext_train test_name = identifier + TrainTestSplitterCustom1.ext_test Storage.store_pd_frame(train, train_name) Storage.store_pd_frame(test, test_name) SessionLogger.log('Split \'' + identifier + '\' (' + str(len(data_frame.index)) + ' entries) into \'' + train_name + '\' (' + str(len(train.index)) + ' entries) and \'' + test_name + '\' (' + str(len(test.index)) + ' entries).')
def create_session_configs(configs_location=None, delete_old_configs=1): if configs_location is None: configs_location = ConfigReader.get_configs_location() if delete_old_configs: Storage.delete_location(configs_location) configs = [SessionConfigReader.get_config_template()] configs = SessionConfigBuilderCustom1.add_all_config_info(configs) n_configs = len(configs) SessionLogger.log('Constructed ' + str(n_configs) + ' new session configs from template: \'' + ConfigReader.get_config_template_id() + '\'.') config_ids = list() idx = 0 for conf in configs: config_id = configs_location + '/' + SessionConfigBuilderCustom1.config_name + str( idx + 1) SessionConfigReader.set_config(conf, config_id) config_ids.append(config_id) idx = idx + 1 SessionLogger.log('Stored ' + str(n_configs) + ' session configs in \'' + configs_location + '\'.') return config_ids
def import_docs(csv_path=None): if csv_path is None: session_folder = os.path.join(TenKGnadImporter.sessions_folder, SessionConfigReader.get_session_id()) corpus_id = SessionConfigReader.read_value( TenKGnadImporter.corpus_id_key) corpus_id = DiskStorageMisc.get_identifier_path(corpus_id) csv_path = os.path.join(session_folder, corpus_id + TenKGnadImporter.csv_ext) df = pd.read_csv( csv_path, sep=';', quotechar='\'', quoting=csv.QUOTE_MINIMAL, header=None, names=[TenKGnadImporter.category_name, TenKGnadImporter.text_name]) category_list = df[TenKGnadImporter.category_name].tolist() df[TenKGnadImporter.category_name] = df.apply( lambda x: [x[TenKGnadImporter.category_name]], axis=1) head, f_name = os.path.split(csv_path) identifier = f_name.split('.')[0] Storage.store_pd_frame(df, identifier) SessionLogger.log('TenKGnad Corpus (' + str(len(df.index)) + ' entries) has been imported into \'' + identifier + '\' (columns: \'' + TenKGnadImporter.category_name + '\', \'' + TenKGnadImporter.text_name + '\').') category_set = set(category_list) category_list = list(category_set) CategoryListHandler.set_categories(category_list) return identifier
def sort(session_id=None): evals = EvaluationHandler.load_evaluations(session_id=session_id) evals = evals.sort_values(by=[EvaluationHandler.score_col], ascending=False) Storage.store_pd_frame(evals, EvaluationHandler.evaluations_id, session_id=session_id)
def create_storage(self): self.storage = Storage(self.system_parameters) layer_height = self.storage.layer_height storage_height = self.storage.height layer_pct = layer_height / storage_height nb_of_layers = self.system_parameters["nb_of_layers"] self.system_parameters["nb_time_step"] = len( self.system_parameters["ports_in"]["port_1"]["temp_in"]) nb_time_step = self.system_parameters["nb_time_step"] ports_in = self.system_parameters["ports_in"] ports_out = self.system_parameters["ports_out"] inlet_water_temp = self.system_parameters["inlet_water_temp"] # Add layers to the storage and initialize them self.storage.add_and_instantiate_layers(nb_of_layers, nb_time_step) # Match ports' height with layers self.storage.match_ports_height_with_layers(ports_in, ports_out, layer_pct) # Calculate a possible missing flow (to respect storage mass balance) self.storage.calculate_missing_flow(nb_time_step) self.storage.add_missing_flow_to_bottom_layer(nb_time_step, inlet_water_temp) # Calculate flows between layers self.storage.calculate_hydraulic_balance(nb_time_step)
def run(self): self.db = Storage() while True: table_name, json_data, extra_data = self.queue.get() try: if table_name == 'issue': #print('StorageActor issue = {}'.format(extra_data['id_issue'])) print(".", end="") self.db.insertIssue(id_issue = extra_data['id_issue'], id_project = extra_data['id_project'], issue_json = str(json_data['issue_json']), events_json = str(json_data['events_json']), comments_json = str(json_data['comments_json'])) elif table_name == 'issue_validate': #print('StorageActor issue_validate = {}'.format(extra_data['id_issue'])) if len(self.db.getIssue(id_project = extra_data['id_project'], id_issue = extra_data['id_issue']).fetchall()) == 0: self.queues['fetch'].put(('issue', extra_data)) elif table_name == 'project_validate': #print('StorageActor project_validate = {}'.format(extra_data['id_project'])) if len(self.db.getProject(id_project=extra_data['id_project']).fetchall()) == 0: self.db.insertProject(id_project=extra_data['id_project'], project_json=str(json_data['project_json'])) self.queues['fetch'].put(('project', extra_data)) else: raise BaseException('Unknown table_name in insert queue {}'.format(table_name)) self.queue.task_done() except BaseException as e: print('StorageActor Error') print(e) print(extra_data)
def curateTygem(kifuFolder, indexFolder, movesPerGame = 1, totalMoves = 1, previousStates = 3): movesPerFile = 10000 outFolder = 'outData/' outfilename = outFolder + input("Enter output file name: ") storage = Storage(outfilename, movesPerFile, previousStates) loader = FileLoader(kifuFolder, indexFolder) fileIndex = 0 movesProcessed = 0 startTime = time.time() bar = ProgressBar(totalMoves, width=60, fmt=ProgressBar.FULL) # TODO: Removed Handicap Games!!!! while movesProcessed < totalMoves: info, game = loader.next() mc = processGame(storage, info, game, movesPerGame, previousStates) movesProcessed += mc bar(mc) storage.writeToFile() endTime = time.time() print('\nTotal time for ' + str(totalMoves) + ' moves: ', startTime-endTime)
def __init__(self, port=12345, DEBUG=True): Storage.__init__(self) self.port = port self.servers = {} self.scan_lock = threading.Lock() self.scan = False self.DEBUG = DEBUG self.findServers(async=True)
def saveToStorage(self, storage: Storage, shouldSaveFeatures): labels = self.labelsToOutputFormat(self.predictedLabels) storage.writeListToFile(labels, self.labelsPath) if shouldSaveFeatures: storage.writeObjsToPkl([self.boundingBoxes,self.laneLines], self.featuresPath) # TODO this may have wierd interaction with videowidget? self.boundingBoxes = [] self.laneLines = []
def remove_noise(data_frame, col_name=col_name, storage_level=0, storage_name='', log=1): df = data_frame.copy() df[NoiseRemoverCustom.new_col_name] = df.apply(lambda x: NoiseRemoverCustom.process_text(x[col_name]), axis=1) log_text = 'Removed noise from documents (' + str(len(df.index)) + ' entries).' if storage_level >= 1 and storage_name != '': Storage.store_pd_frame(df, storage_name) log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + NoiseRemoverCustom.new_col_name + '\').' if log: SessionLogger.log(log_text) return df
def run_classification_test(): corpus_id = SessionConfigReader.read_value(SetupRunner.corpus_id_key) vectorized_df_id = corpus_id + SetupRunner.ext_vectorized train_df_id = vectorized_df_id + SetupRunner.ext_train test_df_id = vectorized_df_id + SetupRunner.ext_test Storage.delete_pd_frame(train_df_id) Storage.delete_pd_frame(test_df_id) Storage.delete_h5_model(SessionConfigReader.read_value(SetupRunner.keras_nn_model_id_key)) vectorized_df = Storage.load_pd_frame(vectorized_df_id) TrainTestSplitter.split_train_test(identifier=vectorized_df_id, data_frame=vectorized_df) train_df_id = vectorized_df_id + SetupRunner.ext_train train = Storage.load_pd_frame(train_df_id) test_df_id = vectorized_df_id + SetupRunner.ext_test test = Storage.load_pd_frame(test_df_id) train_classification_outs = ClassificationInterpreter.create_out_vectors(train) Classifier.create_model(train_classification_outs) test_classified = Classifier.classify(test) test_interpreted = ClassificationInterpreter.interpret_output(test_classified) score = ClassificationInterpreter.evaluate_output(test_interpreted) EvaluationHandler.add_evaluation(score) return test_interpreted
def preprocess_texts(data_frame, col_name=col_name, new_col_name=col_name_preprocessed, storage_level=0, storage_name='', log=1): storage_name_ext = storage_name if storage_name != '': storage_name_ext = storage_name + TextPreprocessorCustom.ext_noise_removed noise_removed_df = NoiseRemover.remove_noise( data_frame, col_name=col_name, storage_level=storage_level - 1, storage_name=storage_name_ext, log=log) if storage_name != '': storage_name_ext = storage_name + TextPreprocessorCustom.ext_stops_removed stops_removed_df = StopWordRemover.remove_stopwords( noise_removed_df, col_name=TextPreprocessorCustom.col_name_noise_removed, storage_level=storage_level - 1, storage_name=storage_name_ext, log=log) if storage_name != '': storage_name_ext = storage_name + TextPreprocessorCustom.ext_lemmatized processed_texts_df = Lemmatizer.normalize( stops_removed_df, col_name=TextPreprocessorCustom.col_name_stops_removed, storage_level=storage_level - 1, storage_name=storage_name_ext, log=log) if storage_level <= 1: processed_texts_df = processed_texts_df.drop( columns=[TextPreprocessorCustom.col_name_noise_removed]) processed_texts_df = processed_texts_df.drop( columns=[TextPreprocessorCustom.col_name_stops_removed]) processed_texts_df = processed_texts_df.rename( columns={TextPreprocessorCustom.col_name_lemmatized: new_col_name}) log_text = 'Documents have been preprocessed (' + str( len(data_frame.index)) + ' entries).' if storage_level >= 1 and storage_name != '': Storage.store_pd_frame( processed_texts_df, storage_name + TextPreprocessorCustom.ext_preprocessed) log_text = log_text + ' Stored in \'' + storage_name + TextPreprocessorCustom.ext_preprocessed + '\' (column: \'' + new_col_name + '\').' if log: SessionLogger.log(log_text) return processed_texts_df
def test_storage_object(self): """Storage Class Testing""" s = Storage() s.add('Naruto', 'TV', 'Shounen', 600, 24, 2006, "FALSE", 4, 8.7, 1258970, 4, 6, 1582963, 12574, 'Ninja Anime') self.assertEqual(s.get('Naruto'), 'Naruto') self.assertEqual(s.contains('Naruto'), True) s.delete('Naruto') self.assertEqual(s.contains('Naruto'), False)
def __init__(self): super(MainWindow, self).__init__() self.ui = Ui_MainWindow() self.ui.setupUi(self) self.dataPoints = dict() self.classifier = ClassifierRunner() self.storage = Storage() self.labelsSaveFolder = None self.dialog = InfoDialog(self.dataPoints, parent=self) self.setupUi()
async def isComplete(message: types.Message): data = message.text[6:].split(' ') if len(data) != 1: await message.answer("Incorrect input for /done command, use /help") else: try: if (Storage.IsComplete(message.from_user.id, data)): Storage.TaskIsComplete(message.from_user.id, data) await message.answer("task with id: " + data[0] + " marked as completed") else: await message.answer("Check if id you entered is correct") except Exception as e: print(e) await message.answer("Check if id you entered is correct")
def on_press(self, key): try: # Write normal keys event = { "id": self.count, "kind": "key_pressed", "key": key.char, "delay": self.get_delay() } print("Key pressed: {0}".format(key)) self.events.append(event) self.count += 1 except AttributeError: if (key == keyboard.Key.esc ): # save recording and and stop listening Storage().write(self.events, "../macros/" + self.filename) self.keyboard_listener.stop() self.mouse_listener.stop() # Write Special Keys print(key) event = { "id": self.count, "kind": "special_key_pressed", "key": str(key), "delay": self.get_delay() } self.events.append(event) print('special key {0} pressed'.format(key)) self.count += 1
async def resetTable(message: types.Message): try: Storage.DeleteAll(message.from_user.id) await message.answer("Your storage was deleted!") except Exception as e: print(e) print("Can't delete your storage")
async def showForDeadline(message: types.Message): deadline = message.text[8:].split(' ') parsed = deadline[0].split('-') print(parsed) if len(deadline) != 1: await message.answer("Incorrect input for /showdl command, use /help") elif len(parsed) != 3: await message.answer("You've entered more date than needed") elif len(parsed[0]) != 4: await message.answer("Incorrect year format") elif len(paresed[1]) != 2 and int(parsed[1]) > 12: await message.answer("Incorrect month format") elif len(parsed[2]) != 2 and int(parsed[2]) > 31: await message.answer("Incorrect day format") else: try: id_date_valid = datetime.strptime(deadline[0], '%Y-%m-%d').date() data = Storage.GetForDeadline(message.from_user.id, deadline) parsedData = "Tasks to do before " + str(deadline[0]) + "\n\n" for text in data: parsedData += "Subject: " + str(text.name) + "\n(ID: " + str(text.ID) + ") task: " + str(text.task) + "\ncreated on: " + str(text.date_cr) + "\ndeadline is " + str(text.deadline) + "\n\n" await message.answer(parsedData) except Exception as e: print(e) await message.answer("Incorrect day format")
def add_evaluation(score, session_id=None): if session_id is None: session_id = ConfigReader.get_session_id() config_id = ConfigReader.get_config_id() evaluation_frame = Storage.load_pd_frame( EvaluationHandler.evaluations_id, session_id=session_id) timestamp_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") row = len(evaluation_frame) evaluation_frame.at[row, EvaluationHandler.timestamp_col] = timestamp_str evaluation_frame.at[row, EvaluationHandler.session_id_col] = session_id evaluation_frame.at[row, EvaluationHandler.config_id_col] = config_id evaluation_frame.at[row, EvaluationHandler.score_col] = score Storage.store_pd_frame(evaluation_frame, EvaluationHandler.evaluations_id, session_id=session_id)
def normalize(data_frame, col_name=col_name, storage_level=0, storage_name='', log=1): df = data_frame.copy() df[LemmatizerSpacyGerman.new_col_name] = df.apply( lambda x: LemmatizerSpacyGerman.process_text(x[col_name]), axis=1) log_text = 'Documents lemmatized with spacy (' + str(len( df.index)) + ' entries).' if storage_level >= 1 and storage_name != '': Storage.store_pd_frame(df, storage_name) log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + LemmatizerSpacyGerman.new_col_name + '\').' if log: SessionLogger.log(log_text) return df
def generate_data_storage(self, no_of_rounds): no_of_rounds = self.no_of_rounds data_storage = [] for i in range(no_of_rounds): temp_storage = Storage(self.id, i) data_storage.append(temp_storage) return data_storage
class TaskData(): global example_description def __init__(self,task_item=None, storage=None): """ This class is designed for development and implementation of storage for information that is related to a Task object. We want new fields in database, and probably new table for storing data of particular tasks. Arguments: storage a Storage object that is used within the application. If not - create new db interface, which must be bad for this kind of objects. Task data must have """ if not task_item: #there is no task argument given? print('WHAT') #logger.log('CHAOTIC', 'No task_item given to TaskData constructor!') sys.exit() else: task_uuid=task_item.dict['uuid'] #task_uuid=task_item.__getattribute__('uuid') #task_uuid = task_item.uuid if not storage: self.storage = Storage() else self.storage = storage #maybe check if a db-record with a description has been already created. self.task_item.description
def solver_scaled(I, dt, C, T): """ Solve 1D wave equation in dimensionless form. """ # Make a hash of the arguments import inspect, hashlib data = inspect.getsource(I) + '_' + str(dt) + '_' + \ str(C) + '_' + str(T) # Not fool proof: if x0 changes value, the source code of I # is still the same, and no recomputation takes place... hashed_input = hashlib.sha1(data).hexdigest() # Use joblib-based tool (class Storage) to store already # computed solutions in files cachedir = 'tmp_%s' % hashed_input is_computed = os.path.isdir(cachedir) print 'cachedir:', cachedir, is_computed storage = Storage(cachedir, verbose=0) def action(u, x, t, n): if n == 0: storage.save('x', x) storage.save('t', t) storage.save('u%d' % n, u) if is_computed: print 'No need to compute the numerical solution' return storage else: print 'Computing the numerical solution' solver_unscaled( I=I, V=0, f=0, c=1, L=1, dt=dt, C=C, T=T, user_action=action) return storage
def __init__(self, action): self.action = action self.site = wikipedia.getSite() self.afi = catlib.Category(self.site, \ u'Категория:Википедия:Статьи для срочного улучшения') self.afi_list = [] self.afi_list_title = [] self.cache = Storage()
def __init__(self, config, eventSched, httpRequester, ownAddrFunc, peerId, persister, pInMeasure, pOutMeasure, peerPool, connBuilder, connListener, connHandler, choker, torrent, torrentIdent, torrentDataPath, version): ##global stuff self.config = config self.version = version self.peerPool = peerPool self.connBuilder = connBuilder self.connListener = connListener self.connHandler = connHandler self.choker = choker ##own stuff self.log = Logger('Bt', '%-6s - ', torrentIdent) self.torrent = torrent self.torrentIdent = torrentIdent self.log.debug("Creating object persister") self.btPersister = BtObjectPersister(persister, torrentIdent) self.log.debug("Creating measure classes") self.inRate = Measure(eventSched, 60, [pInMeasure]) self.outRate = Measure(eventSched, 60, [pOutMeasure]) self.inRate.stop() self.outRate.stop() self.log.debug("Creating storage class") self.storage = Storage(self.config, self.btPersister, torrentIdent, self.torrent, torrentDataPath) self.log.debug("Creating global status class") self.pieceStatus = PieceStatus(self.torrent.getTotalAmountOfPieces()) self.log.debug("Creating file priority class") self.filePrio = FilePriority(self.btPersister, self.version, self.pieceStatus, self.storage.getStatus(), self.torrent, torrentIdent) self.log.debug("Creating requester class") self.requester = Requester(self.config, self.torrentIdent, self.pieceStatus, self.storage, self.torrent) self.log.debug("Creating tracker requester class") self.trackerRequester = TrackerRequester(self.config, self.btPersister, eventSched, peerId, self.peerPool, ownAddrFunc, httpRequester, self.inRate, self.outRate, self.storage, self.torrent, self.torrentIdent, self.version) self.log.debug("Creating superseeding handler class") self.superSeedingHandler = SuperSeedingHandler(self.torrentIdent, self.btPersister, self.storage.getStatus(), self.pieceStatus) ##callbacks self.log.debug("Adding callbacks") self._addCallbacks() ##status self.state = 'stopped' self.started = False self.paused = True ##lock self.lock = threading.Lock()
async def send_welcome(message: types.Message): try: Storage.AddTable(message.from_user.id) await message.answer("Hello!\nI am a homework storage bot.\nTo begin type /help") except AppExceptions.Table_Already_Exist as error: print(error) await message.answer("Looks like you already have a table.\nType /help to see the list of all commands and their description.") except Exception as e: print(e) print("Unexpected error!")
def create_out_vectors(data_frame, col_name=col_name_categories, new_col_name=new_col_name_cat_vec, storage_level=0, storage_name=''): data_frame[new_col_name] = data_frame.apply( lambda x: ClassificationInterpreterCustom2.get_cat_vec(x[col_name] ), axis=1) log_text = 'Category vectors for classifier training have been created (' + str( len(data_frame.index)) + ' entries).' if storage_level >= 1 and storage_name != '': storage_name = storage_name + ClassificationInterpreterCustom2.ext_out_vecs Storage.store_pd_frame(data_frame, storage_name) log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').' SessionLogger.log(log_text) return data_frame
def run(self): """Saves page to db""" db = Storage() db.execute("""CREATE TABLE IF NOT EXISTS quality (name TEXT, templates INT, edits INT, len INT, cats INT, linked INT, referenced INT, images INT, iwiki INT, sections INT, users INT)""") db.delete("quality", {"name": self.pagename}) db.insert("quality", self.eval())
class ReplicsCounter(): def __init__(self): self.cache = Storage() self.cache.create("articles", \ {"oldid":"INT UNIQUE", "name":"TEXT", "ts":"DATE", "replics": "INT"}) def countPage(self, page): """Counts repics at AFI page""" sections = {} sect = None n = -1 # one line for header for s in page.getSections(): if sect != None: sections[sect] = (n, s[0]) sect = s[3] n = s[0] sections[sect] = (n, len(page.get())) # last one for s in sections: replics = -1 # one for header text = page.get()[sections[s][0]:sections[s][1]].splitlines() for line in text: sline = line.strip() if (len(sline) > 2): if sline[:2] != "{{" and sline[:-2] != "}}": replics += 1 #print "%s %s" % (replics, line) wikipedia.output( u"%s %s %s" % (s, sections[s], replics)) self.cache.execute(u'UPDATE articles SET replics = %s WHERE name = "%s";' % (replics, self.cache.quote(s))) def countCat(self, catname): cat = catlib.Category(wikipedia.getSite(), catname) for page in cat.articles(): print page self.countPage(page) def replicsPage(self, pagename): r = self.cache.findone('articles', {"name":pagename}, what = ["replics"]) if r == None: return "-" else: return r[0]
def __init__(self): """инициализируем и хреначим массив""" self.data = [] for i in xrange(0, 12): self.data.append([]) self.db = Storage() s = u"""SELECT name, templates, edits, len, cats, linked, referenced, images, iwiki, sections, users FROM quality ORDER BY name;""" re = self.db.execute(s) for l in re.fetchall(): #print l[0] for i in xrange(1, 11): self.data[i].append(l[i])
def remove(self): """ Removes this object from the database. It will still remain in memory, however, and can be resaved at a later time provided that the original reference is maintained.""" storage = Storage() database = self._database collection = self._collection if database is None or collection is None: raise ValueError, "The object needs to be assigned a database and a collection." storage.getDatabase(database) storage.getCollection(collection) documents = storage.removeDocuments({"_id":self._id})
def save(self): """Save this object into the database with all its public attributes.""" # Can't save without a database or a table if self._database is None: raise ValueError, "No database has been selected." if self._collection is None: raise ValueError, "No collection has been selected." # Check private variables. We probably shouldn't store these. document = {} for key, value in self.__dict__.items(): key = key.replace("_"+self._type, "") if key.startswith("__"): continue document[key] = value # Let's store this object storage = Storage() storage.getDatabase(self._database) storage.getCollection(self._collection) storage.insertDocuments(document) self._id = document["_id"]
def getObjectsByKey(self, key, value, limit=None): """ This will retrieve documents from the database and collection specified by this object based on one of their keys and convert them to their proper Python object state. :param key: The key to select on. :param value: The value to search for. :param limit: The maximum amount of objects to return. Will return all results by default. :rtype: All the matching objects stored in the database. """ storage = Storage() database = self._database collection = self._collection if database is None or collection is None: raise ValueError, "The object needs to be assigned a database and a collection." storage.getDatabase(database) storage.getCollection(collection) documents = storage.getDocuments({key:value}, limit) objects = [ self.loadFromRawData( data ) for data in documents ] return objects
finflag = Event() ann = [None] myid = 'M' + version.replace('.', '-') myid = myid + ('-' * (8 - len(myid))) + b2a_hex(sha(repr(time()) + ' ' + str(getpid())).digest()[-6:]) seed(myid) pieces = [info['pieces'][x:x+20] for x in xrange(0, len(info['pieces']), 20)] def failed(reason, errorfunc = errorfunc, doneflag = doneflag): doneflag.set() if reason is not None: errorfunc(reason) rawserver = RawServer(doneflag, config['timeout_check_interval'], config['timeout'], errorfunc = errorfunc, maxconnects = config['max_allow_in']) try: try: storage = Storage(files, open, path.exists, path.getsize) except IOError, e: errorfunc('trouble accessing files - ' + str(e)) return def finished(finfunc = finfunc, finflag = finflag, ann = ann, storage = storage, errorfunc = errorfunc): finflag.set() try: storage.set_readonly() except (IOError, OSError), e: errorfunc('trouble setting readonly at end - ' + str(e)) if ann[0] is not None: ann[0](1) finfunc() rm = [None] def data_flunked(amount, rm = rm, errorfunc = errorfunc, report_hash_failures = config['report_hash_failures']):
class Bt: def __init__(self, config, eventSched, httpRequester, ownAddrFunc, peerId, persister, pInMeasure, pOutMeasure, peerPool, connBuilder, connListener, connHandler, choker, torrent, torrentIdent, torrentDataPath, version): ##global stuff self.config = config self.version = version self.peerPool = peerPool self.connBuilder = connBuilder self.connListener = connListener self.connHandler = connHandler self.choker = choker ##own stuff self.log = Logger('Bt', '%-6s - ', torrentIdent) self.torrent = torrent self.torrentIdent = torrentIdent self.log.debug("Creating object persister") self.btPersister = BtObjectPersister(persister, torrentIdent) self.log.debug("Creating measure classes") self.inRate = Measure(eventSched, 60, [pInMeasure]) self.outRate = Measure(eventSched, 60, [pOutMeasure]) self.inRate.stop() self.outRate.stop() self.log.debug("Creating storage class") self.storage = Storage(self.config, self.btPersister, torrentIdent, self.torrent, torrentDataPath) self.log.debug("Creating global status class") self.pieceStatus = PieceStatus(self.torrent.getTotalAmountOfPieces()) self.log.debug("Creating file priority class") self.filePrio = FilePriority(self.btPersister, self.version, self.pieceStatus, self.storage.getStatus(), self.torrent, torrentIdent) self.log.debug("Creating requester class") self.requester = Requester(self.config, self.torrentIdent, self.pieceStatus, self.storage, self.torrent) self.log.debug("Creating tracker requester class") self.trackerRequester = TrackerRequester(self.config, self.btPersister, eventSched, peerId, self.peerPool, ownAddrFunc, httpRequester, self.inRate, self.outRate, self.storage, self.torrent, self.torrentIdent, self.version) self.log.debug("Creating superseeding handler class") self.superSeedingHandler = SuperSeedingHandler(self.torrentIdent, self.btPersister, self.storage.getStatus(), self.pieceStatus) ##callbacks self.log.debug("Adding callbacks") self._addCallbacks() ##status self.state = 'stopped' self.started = False self.paused = True ##lock self.lock = threading.Lock() ##internal functions - callbacks def _addCallbacks(self): ownStatus = self.storage.getStatus() self.persistentStatusCallback = self.config.addCallback((('storage', 'persistPieceStatus'),), ownStatus.enablePersisting) def _removeCallbacks(self): self.config.removeCallback(self.persistentStatusCallback) ##internal functions - start/pause/stop - common def _halt(self, targetState): if self.paused and targetState in ('shutdown', 'remove'): #stopping and already paused, only need to stop the tracker requester and the callbacks self.log.debug("Removing callbacks") self._removeCallbacks() self.log.debug("Stopping tracker requester") self.trackerRequester.stop() else: #either stopping, removing or shutdown and still running or loading self.log.debug("Aborting storage loading just in case") self.storage.abortLoad() if self.started: #were already running self.started = False if targetState == 'stop': self.log.debug("Pausing tracker requester") self.trackerRequester.pause() else: self.log.debug("Removing callbacks") self._removeCallbacks() self.log.debug("Stopping tracker requester") self.trackerRequester.stop() self.log.debug("Removing us from choker") self.choker.removeTorrent(self.torrentIdent) self.log.debug("Removing us from connection builder") self.connBuilder.removeTorrent(self.torrentIdent) self.log.debug("Removing us from connection listener") self.connListener.removeTorrent(self.torrent.getTorrentHash()) self.log.debug("Removing us from connection handler") self.connHandler.removeTorrent(self.torrentIdent) self.log.debug("Stopping transfer measurement") self.inRate.stop() self.outRate.stop() #shutdown/removal specific tasks which need to be done regardless of current status if targetState in ('shutdown', 'remove'): self.log.debug("Removing all infos related to us from connection pool") self.peerPool.clear(self.torrentIdent) if targetState == 'remove': self.log.debug('Removing all persisted objects of this torrent') self.btPersister.removeAll() ##internal functions - start/pause/stop - specific def _start(self, loadSuccess): try: if loadSuccess: #loading was successful, add to handlers self.log.debug("Reseting requester") self.requester.reset() self.log.debug("Starting transfer measurement") self.inRate.start() self.outRate.start() self.log.debug("Adding us to connection handler") self.connHandler.addTorrent(self.torrentIdent, self.torrent, self.pieceStatus, self.inRate, self.outRate, self.storage, self.filePrio, self.requester, self.superSeedingHandler) self.log.debug("Adding us to connection listener") self.connListener.addTorrent(self.torrentIdent, self.torrent.getTorrentHash()) self.log.debug("Adding us to connection builder") self.connBuilder.addTorrent(self.torrentIdent, self.torrent.getTorrentHash()) self.log.debug("Adding us to choker") self.choker.addTorrent(self.torrentIdent, self.storage.getStatus(), self.superSeedingHandler) self.log.debug("Starting tracker requester") self.trackerRequester.start() self.started = True self.state = 'running' except: #something failed - hard self.log.error("Error in load function:\n%s", logTraceback()) ##external functions - state def start(self): #called when torrent is started self.lock.acquire() if self.paused: self.paused = False if self.storage.isLoaded(): self.log.debug("Storage already loaded, skipping hashing") self._start(True) else: self.storage.load(self._start) self.state = 'loading' self.lock.release() def stop(self): #called when torrent is stopped self.lock.acquire() if not self.paused: self._halt('stop') self.paused = True self.state = 'stopped' self.lock.release() def shutdown(self): #called on shutdown self.lock.acquire() self._halt('shutdown') self.paused = False self.state = 'stopped' self.lock.release() def remove(self): #called when torrent is removed self.lock.acquire() self._halt('remove') self.paused = False self.state = 'stopped' self.lock.release() ##external functions - stats def getStats(self, wantedStats): self.lock.acquire() stats = {} if wantedStats.get('state', False): stats['state'] = self.state #connections if wantedStats.get('connections', False): stats.update(self.connHandler.getStats(self.torrentIdent, connDetails=True)) #files if wantedStats.get('files', False): stats['files'] = self.filePrio.getStats() #peers if wantedStats.get('peers', False) or wantedStats.get('connectionAverages', False): #get peer stats connAverages = wantedStats.get('connectionAverages', False) stats.update(self.peerPool.getStats(self.torrentIdent)) stats.update(self.connHandler.getStats(self.torrentIdent, connSummary=True, connAverages=connAverages)) stats.update(self.trackerRequester.getStats(trackerSummary=True)) #normalise peer stats if stats['connectedLeeches'] > stats['knownLeeches']: stats['knownLeeches'] = stats['connectedLeeches'] if stats['connectedSeeds'] > stats['knownSeeds']: stats['knownSeeds'] = stats['connectedSeeds'] if stats['knownLeeches'] + stats['knownSeeds'] > stats['knownPeers']: stats['knownPeers'] = stats['knownLeeches'] + stats['knownSeeds'] elif stats['knownLeeches'] + stats['knownSeeds'] < stats['knownPeers']: stats['knownLeeches'] += stats['knownPeers'] - stats['knownSeeds'] #generate additional conn stats if necessary if connAverages: if stats['knownSeeds'] == 0: stats['knownLeechesPerSeed'] = 0 else: stats['knownLeechesPerSeed'] = (stats['knownLeeches'] * 1.0) / stats['knownSeeds'] #pieces if wantedStats.get('pieceAverages', False): stats.update(self.pieceStatus.getStats(pieceAverages=True)) #progress stats if wantedStats.get('progress', False): stats.update(self.storage.getStats()) #requests if wantedStats.get('requests', False) or wantedStats.get('pieceAverages', False): reqDetails = wantedStats.get('requests', False) pieceAverages = wantedStats.get('pieceAverages', False) stats.update(self.connHandler.getRequesterStats(self.torrentIdent, requestDetails=reqDetails, pieceAverages=pieceAverages)) #tracker if wantedStats.get('tracker', False): stats.update(self.trackerRequester.getStats(trackerDetails=True)) if wantedStats.get('trackerStatus', False): stats.update(self.trackerRequester.getStats(trackerStatus=True)) #transfer stats if wantedStats.get('transfer', False): stats['inRawBytes'] = self.inRate.getTotalTransferedBytes() stats['outRawBytes'] = self.outRate.getTotalTransferedBytes() stats['inPayloadBytes'] = self.inRate.getTotalTransferedPayloadBytes() stats['outPayloadBytes'] = self.outRate.getTotalTransferedPayloadBytes() stats['inRawSpeed'] = self.inRate.getCurrentRate() stats['outRawSpeed'] = self.outRate.getCurrentRate() stats['protocolOverhead'] = (100.0 * (stats['inRawBytes'] + stats['outRawBytes'] - stats['inPayloadBytes'] - stats['outPayloadBytes'])) / max(stats['inPayloadBytes'] + stats['outPayloadBytes'], 1.0) if wantedStats.get('transferAverages', False): stats['avgInRawSpeed'] = self.inRate.getAverageRate() * 1024 stats['avgOutRawSpeed'] = self.outRate.getAverageRate() * 1024 stats['avgInPayloadSpeed'] = self.inRate.getAveragePayloadRate() * 1024 stats['avgOutPayloadSpeed'] = self.outRate.getAveragePayloadRate() * 1024 #torrent stats if wantedStats.get('torrent', False): stats.update(self.torrent.getStats()) stats['superSeeding'] = self.superSeedingHandler.isEnabled() self.lock.release() return stats ##external funcs - actions def setFilePriority(self, fileIds, priority): self.lock.acquire() for fileId in fileIds: self.filePrio.setFilePriority(fileId, priority) self.lock.release() def setFileWantedFlag(self, fileIds, wanted): self.lock.acquire() if self.started: #already running, need to go through the connection handler because of syncing issues self.connHandler.setFileWantedFlag(self.torrentIdent, fileIds, wanted) else: #not running for fileId in fileIds: self.filePrio.setFileWantedFlag(fileId, wanted) self.lock.release() def setSuperSeeding(self, enabled): self.lock.acquire() if not enabled == self.superSeedingHandler.isEnabled(): if self.started: self.connHandler.setSuperSeeding(self.torrentIdent, enabled) else: self.superSeedingHandler.setEnabled(enabled) self.lock.release() ##external funcs - tracker actions def getTrackerInfo(self): self.lock.acquire() trackerInfo = self.trackerRequester.getTrackerInfo() self.lock.release() return trackerInfo def setTrackerInfo(self, newTrackerInfo): self.lock.acquire() self.trackerRequester.setTrackerInfo(newTrackerInfo) self.lock.release() ##external funcs - other def getInfohash(self): self.lock.acquire() infohash = self.torrent.getTorrentHash() self.lock.release() return infohash
def __init__(self): self.cache = Storage() self.cache.create("articles", \ {"oldid":"INT UNIQUE", "name":"TEXT", "ts":"DATE", "replics": "INT"})
class EvernoteController: def __init__(self): if DEV_TOKEN: self.token = DEV_TOKEN else: self.token = Oauth(SANDBOX).oauth() sys.stdout.write('Logging\r') if SANDBOX: self.client = EvernoteClient(token=self.token) else: self.client = EvernoteClient(token=self.token, service_host=SERVICE_HOST) self.userStore = self.client.get_user_store() self.noteStore = self.client.get_note_store() if LOCAL_STORAGE: self.__set_storage() print 'Login Succeed as ' + self.userStore.getUser().username def __set_storage(self): print 'Loading Storage' self.storage = Storage(self.noteStore, self.token) print 'Storage loaded' def create_notebook(self,title): notebook = Types.Notebook() notebook.name = title notebook = self.noteStore.createNotebook(notebook) if LOCAL_STORAGE: self.storage.create_notebook(notebook) print_line('Created notebook: %s successfully'%title) def create_note(self, title, content, notebook = None): note = Types.Note() note.title = title note.content = '<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">' note.content += content #'<en-note>Hello, world!</en-note>' if notebook: note.notebookGuid = self.myfile(notebook).guid note = self.noteStore.createNote(note) if LOCAL_STORAGE: self.storage.create_note(note, notebook) print_line('Created note: %s successfully' %title) def move_note(self, note, _to): if type(self.myfile(note)) != type(Types.Note()) or type(self.myfile(_to)) != type(Types.Notebook()): raise Exception('Type Error') self.noteStore.copyNote(self.token, self.myfile(note).guid, self.myfile(_to).guid) if SPECIAL_DEV_TOKEN: self.noteStore.expungeNote(self.token, self.myfile(note).guid) else: self.noteStore.deleteNote(self.token, self.myfile(note).guid) if LOCAL_STORAGE: self.storage.move_note(note, _to) print_line('Move %s to %s successfully'%(note,_to)) def delete_note(self, note): if type(self.myfile(note)) != type(Types.Note()): raise Exception('Types Error') self.noteStore.deleteNote(self.token, self.myfile(note).guid) # BUG if LOCAL_STORAGE: self.storage.delete_note(note) print_line('Deleted %s successfully'%note) def delete_notebook(self, notebook): if SPECIAL_DEV_TOKEN: if type(self.myfile(notebook)) != type(Types.Notebook()): raise Exception('Types Error') self.noteStore.expungeNotebook(self.token, self.myfile(notebook).guid) # BUG if LOCAL_STORAGE: self.storage.delete_notebook(notebook) print_line('Deleted %s successfully'%notebook) def myfile(self, s): if LOCAL_STORAGE: return self.storage.myfile(s) f = s.split('/') if '/' in s: for nb in self.noteStore.listNotebooks(): if nb.name == f[0]: fi = NoteStore.NoteFilter() fi.notebookGuid = nb.guid for ns in self.noteStore.findNotes(self.token, fi, 0, 999).notes: if ns.title == f[1]: return ns else: for nb in self.noteStore.listNotebooks(): if nb.name == f[0]: return nb raise Exception('%s not found'%s) def show_notebook(self): if LOCAL_STORAGE: self.storage.show_notebook() else: for nb in self.noteStore.listNotebooks(): print_line(nb.name) def show_notes(self, notebook=None): if LOCAL_STORAGE: self.storage.show_notes(notebook) else: for nb in self.noteStore.listNotebooks(): if not notebook: print_line(nb.name + ':') if not notebook or nb.name == notebook: f = NoteStore.NoteFilter() f.notebookGuid = nb.guid for ns in self.noteStore.findNotes(self.token, f, 0, 999).notes: print_line(('' if notebook else ' ') + ns.title)
def __set_storage(self): print 'Loading Storage' self.storage = Storage(self.noteStore, self.token) print 'Storage loaded'
from fakeopen import FakeOpen from Storage import Storage files = (('file_a', 80), ('file_b', 32), ('file_c', 32), ('file_d', 32)) fs = FakeOpen() storage = Storage(files, fs.open, fs.exists, fs.getsize) length = 64 print '[0, 64)=%s' % storage._intervals(0 * length, length) print '[64, 128)=%s' % storage._intervals(1 * length, length) print '[128, 192)=%s' % storage._intervals(2 * length, length) import string # Create 64 bytes of data. data_length = 64 written_data = string.printable[:data_length] # Write this data starting at a global offset of 64. offset = 64 storage.write(offset, written_data) # Read this data back starting at a global offset of 64. read_data = storage.read(offset, data_length) print 'written_data == read_data? %s' % (written_data == read_data)
#utility method to set up GPIO used by sensors attached to the pi #called at the beginning of __main__ def setupGPIO(): os.system("sudo modprobe w1-therm") os.system("sudo modprobe w1-gpio") GPIO.setmode(GPIO.BCM) GPIO.setup(17, GPIO.IN, GPIO.PUD_UP) #need to set up filepath and filename when we get config in __main__ #used for all classes and threads in this file for logging purposes logger = FileOperations() #Need to setup actual minFreeMB once we get config data in __main__ #used by recording threads to check if there is enough room on the pi to record data storage = Storage() storage.setLogger(logger) #parses data in config file and returns a map of data entries to values def readConfig(): configDict = {} #finding and opening config file #parses config file with built in python config parser local_file_path = os.path.dirname(os.path.realpath(__file__)) + '/' config = ConfigParser.ConfigParser() config.readfp(open(local_file_path + 'config')) base_data_directory = os.path.join(config.get('Saving', 'base_data_directory'), "") configDict['base_data_directory'] = base_data_directory
class Corellations(): """Рассчитать и распечатать статистические данные Количество статей Средние величины Корелляции see http://stackoverflow.com/questions/3949226/calculating-pearson-correlation-and-significance-in-python""" def average(self, x): assert len(x) > 0 return float(sum(x)) / len(x) def sq_avg(self, x): s = 0 for i in x: s+=i*i return math.sqrt((s/len(x))) def sq_dev(self, x): s = 0 avg = self.average(x) for i in x: s += (i-avg)**2 return math.sqrt((s/len(x))) def pearson_def(self, x, y): assert len(x) == len(y) n = len(x) assert n > 0 avg_x = self.average(x) avg_y = self.average(y) diffprod = 0 xdiff2 = 0 ydiff2 = 0 for idx in range(n): xdiff = x[idx] - avg_x ydiff = y[idx] - avg_y diffprod += xdiff * ydiff xdiff2 += xdiff * xdiff ydiff2 += ydiff * ydiff return diffprod / math.sqrt(xdiff2 * ydiff2) def __init__(self): """инициализируем и хреначим массив""" self.data = [] for i in xrange(0, 12): self.data.append([]) self.db = Storage() s = u"""SELECT name, templates, edits, len, cats, linked, referenced, images, iwiki, sections, users FROM quality ORDER BY name;""" re = self.db.execute(s) for l in re.fetchall(): #print l[0] for i in xrange(1, 11): self.data[i].append(l[i]) def print_stats(self): #print self.data stats = u"Articles count %s \r\n" % len(self.data[1]) val = ["", "templ", "edi", "len", "cat", "links", "refs", "img", "iwiki", "sect", "users"] stats += " math avg root mean deviation max min \r\n" for i in xrange(1, 11): stats += "%8s: %-12.10g %-12.10g %-12.10g %8g %6g \r\n"% (val[i], self.average(self.data[i]), self.sq_avg(self.data[i]), self.sq_dev(self.data[i]), max(self.data[i]), min(self.data[i])) r = "" stats += "\r\n" stats += "Corellations table \r\n" for v in val: r += "%10s"%(v) stats += r+"\r\n" r = "" p = {} for i in xrange(1, 11): for j in xrange(1, 11): d = self.pearson_def(self.data[i], self.data[j]) r+="%-10.4g " % d if i > j: p["%s-%s"%(val[i], val[j])] = d stats += "%8s %s\r\n"%(val[i], r) r="" stats += "\r\n" stats += " Maximum values | Minimum values \r\n" up = sorted(p.items(), key=lambda x: -abs(x[1])) #print up[0] for l in xrange(0, 12): stats += "%12s %6.12s | %12s %6.12s \r\n" % (up[l][0], up[l][1], up[-l-1][0], up[-l-1][1]) return stats def print_sel(self): """Распечатываем максимальные и минимальные статьи""" k = (1000, 1000, 1, 1000, 1000, 1000, 1000, 1000, 1000, 1000) s = """SELECT name, ((templates * %s) + (edits * %s) + (len * %s) + (cats * %s) + (linked * %s) + (referenced * %s) + (images * %s) + (iwiki * %s) + (sections * %s) + (users * %s)) AS value FROM quality ORDER BY value ASC LIMIT 10;""" % k re = self.db.execute(s) for l in re.fetchall(): print "%s %s" % l print "------------" s = """SELECT name, ((templates * %s) + (edits * %s) + (len * %s) + (cats * %s) + (linked * %s) + (referenced * %s) + (images * %s) + (iwiki * %s) + (sections * %s) + (users * %s)) AS value FROM quality ORDER BY value DESC LIMIT 10;""" % k re = self.db.execute(s) for l in re.fetchall(): print "%s %s" % l
class AllAFI: """module for AFI stats update""" def __init__(self, action): self.action = action self.site = wikipedia.getSite() self.afi = catlib.Category(self.site, \ u'Категория:Википедия:Статьи для срочного улучшения') self.afi_list = [] self.afi_list_title = [] self.cache = Storage() def load_all(self): """Loads all articles for improvement to sqlite table""" self.cache.create('category', {'name':'TEXT', 'cat':'TEXT'}) self.cache.delete('category') self.afi_list = self.afi.articlesList() self.afi_list_title = [self.cache.quote(_.title(withNamespace=False)) for _ in self.afi.articlesList()] for a in self.afi_list: wikipedia.output(a) for cat in a.categories(): self.cache.insert('category', (a.title(withNamespace=False), cat.title(withNamespace=False))) # now clear articles table from non-actual articles re = self.cache.cursor.execute(u"SELECT name FROM articles;") for l in re.fetchall(): if l[0] not in self.afi_list_title: wikipedia.output(l[0]) self.cache.delete('articles', {'name':l[0]}) def update_stats(self): """prints stats to wikipedia page""" text = "" n1 = self.cache.cursor.execute("SELECT count(DISTINCT name) FROM category;").fetchone()[0] n2 = self.cache.cursor.execute("SELECT count(*) FROM articles;").fetchone()[0] text += u"Всего статей на КУЛ: '''%s''', статей в базе бота '''%s''' \r\n" % (n1, n2) re = self.cache.cursor.execute("SELECT cat, count(*) AS c FROM category GROUP BY cat HAVING c>10 ORDER BY c DESC;") text += u"== Топ категорий <ref>Категории, в которых более 10 статей на улучшении, количество статей указано в скобках</ref> == \r\n" for l in re.fetchall(): text += u"* [[:Категория:%s|]]: (%s) \r\n" % l text += u"== Самые старые статьи <ref>Учитывается самая первая номинация КУЛ</ref> == \r\n" re = self.cache.cursor.execute(u"SELECT name, ts FROM articles ORDER BY ts limit 20;") for l in re.fetchall(): text += u"* [[%s]] (%s) \r\n" % l re = self.cache.cursor.execute("SELECT count(*), replics FROM articles GROUP BY replics;") text += u"== По количеству реплик == \r\n" for l in re.fetchall(): text += u"* Обсуждения %s статей имеют %s реплик\r\n" % (l) re = self.cache.cursor.execute("SELECT topic, topic, n, ts FROM updates ORDER BY n DESC;") text += u"== Последние обновления == \r\n" for l in re.fetchall(): text += u"* [[Википедия:К улучшению/Тематические обсуждения/%s|%s]]: (Статей %s, обновлена %s) \r\n" % (l) text += u"== Примечания ==\r\n{{примечания}}" P = wikipedia.Page(self.site, u"Википедия:К улучшению/Тематические обсуждения/Статистика") P.put(text, u"Обновление статистики", botflag = True) def run(self): """entry point""" if self.action == "all": self.load_all() self.update_stats()
def __init__(self, db_name): Storage.__init__(self) self._db_name = db_name DBUtil.execute_command(db_name, "CREATE TABLE IF NOT EXISTS Results (name TEXT PRIMARY KEY, value BLOB, started DATETIME, runtime FLOAT)")