def run(self): try: fact = DataStore() fact.set_ds_type(self.ds_type) ds = fact.create_data_store() ds.set_user(self.params["user"]) ds.set_password(self.params["password"]) ds.set_host(self.params["host"]) #ds.set_port(self.params["port"])//use default system port ds.set_db(self.params["db_name"]) ds.set_table(self.params["table"]) ds.connect() data = ds.get_all_data() self.fill_table.emit(data) self.sleep(2) except Exception as err: print(err) finally: self.t_finish.emit()
def __init__(self, yt_resource: Resource) -> None: """ :param yt_resource: googleapiclient.discovery.build object """ self._yt_resource = yt_resource self._channel_store = DataStore() self._subscribers = []
def run(self): try: fact = DataStore() fact.set_ds_type(self.f_type) ds = fact.create_data_store() if self.f_type == "json" or self.f_type == "csv": ds.set_file(self.params[self.f_type]) if self.f_type == "postgre" or self.f_type == "mongo": ds.set_user(self.params["user"]) ds.set_password(self.params["password"]) ds.set_host(self.params["host"]) #ds.set_port(self.params["port"])//use default system port ds.set_db(self.params["db_name"]) ds.set_table(self.params["table"]) ds.connect() for row in self.data: ds.insert_unique(*row) self.sleep(2) except Exception as err: print(err) finally: self.t_finish.emit()
class WebSpider(scrapy.Spider): name = "web" def __init__(self): dispatcher.connect(self.spider_closed, signals.spider_closed) self.dstore = DataStore() # Scrapy's method to start crawling def start_requests(self): # Seed URLs urls = [ 'https://es.wikipedia.org/wiki/Procesamiento_de_lenguajes_naturales', 'https://es.wikipedia.org/wiki/Aprendizaje_autom%C3%A1tico', 'https://es.wikipedia.org/wiki/B%C3%BAsqueda_y_recuperaci%C3%B3n_de_informaci%C3%B3n', 'https://es.wikipedia.org/wiki/Modelo_de_espacio_vectorial' ] # Start crawling process for u in urls: yield scrapy.Request(url=u, callback=self.parse) # Set scraped count to 0 self.count = 0 # Crawling Algorithm def parse(self, response): ''' This method is called repeatedly to process documents from the URL frontier. Scrapy handles compliance of Politeness policies ''' url = response.request.url # Remove html tags from the document raw_text = GetText(response.body) # Preprocess the document's content tokens = Preprocess(raw_text) # Add document to be stored in local storage if self.count < LIMIT: self.dstore.add_document(tokens, response.body, url) # Extract url references and add them to the url frontier for a in response.css('a'): if 'href' in a.attrib: yield response.follow(a, callback=self.parse) # Limit of pages to crawl if self.count > LIMIT: raise CloseSpider(reason='reached_limit') # Force spider to close print(str(self.count) + '\n\n') # IGNORE/COMMENT THIS self.count += 1 def spider_closed(self, spider): # Store scraped documents when spider finishes crawling self.dstore.store_data()
def index(name=None): db = DataStore() settings = db.get_settings() db.shutdown() min_temp = math.floor(settings['target_temp'] - temp_range/2) max_temp = math.ceil(settings['target_temp'] + temp_range/2) return render_template('index3.html', min_temp=min_temp, max_temp=max_temp, **settings)
def insert(self, title, category, description, date, site_url, url, image_url): try: conn = self.data_store.get_connection() entry_existing = r.table(self.db_article_table) \ .get_all(title, index="title") \ .count() \ .run(conn) > 0 if entry_existing: return r.table(self.db_article_table).insert({ 'title': title, 'category': category, 'description': description, 'date': date, 'site_url': site_url, 'url': url, 'image_url': image_url, 'synchronised_to_discord': False }).run(conn) finally: DataStore.disconnect(conn)
def __init__(self, yt_resource: Resource, playlist_id) -> None: """ :param yt_resource: googleapiclient.discovery.Resource object :param playlist_id: youtube playlist ID """ self._yt_resource = yt_resource self._playlist_id = playlist_id self._videos_store = DataStore()
def get_unsynchronised(self): try: conn = self.data_store.get_connection() return r.table(self.db_article_table) \ .filter({'synchronised_to_discord': False}) \ .run(conn) finally: DataStore.disconnect(conn)
def __init__(self, yt_resource: Resource, channel_id) -> None: """ :param yt_resource: googleapiclient.discovery.Resource object :param channel_id: youtube channel ID """ self._yt_resource = yt_resource self._channel_id = channel_id self._playlist_store = DataStore()
def settings(): _settings = request.get_json() db = DataStore() logging.getLogger('pyro').debug('Saving new settings: %s' % _settings) db.save_profile(_settings) db.set_active_profile(_settings['id']) db.apply_active_profile() db.save_settings(_settings) return 'ok'
def test_get_path(self): """Test retrieving a file path.""" s = DataStore() result = s.get_path(self.fname) assert isinstance(result, str) assert os.fspath(self.data_path / self.fname) == result with pytest.raises(ValueError, match=r"No file found named"): s.get_path("693_UNCI")
def __init__(self): config = Configuration() self.data_store = DataStore() self.db_host = config.get_db_host() self.db_port = config.get_db_port() self.db_username = config.get_db_username() self.db_password = config.get_db_password() self.db_name = config.get_db_name() self.db_article_table = config.get_article_table()
def __init__(self, *args, **kwargs): super(StudentPracticePairTests, self).__init__(*args, **kwargs) self.ds = DataStore() self.ds.read_students_from_csv_file("data/test_data/students.csv") self.ds.read_practices_from_csv_file("data/test_data/practices.csv") self.pair = StudentPracticePair( self.ds.df_students.loc['S001'], self.ds.df_practices.loc['P001'], address_path="data/test_data/addresses.csv")
def index(name=None): db = DataStore() settings = db.get_settings() db.shutdown() min_temp = math.floor(settings['target_temp'] - temp_range / 2) max_temp = math.ceil(settings['target_temp'] + temp_range / 2) return render_template('index.html', min_temp=min_temp, max_temp=max_temp, **settings)
def history(name=None): db = DataStore() settings = db.get_settings() db.shutdown() tr = request.args.get('tr') if tr is None: tr = 8 min_temp = math.floor(settings['target_temp'] - temp_range/2) max_temp = math.ceil(settings['target_temp'] + temp_range/2) return render_template('history.html', tr=tr, min_temp=min_temp, max_temp=max_temp, **settings)
def set_synchronised(self, unsynchronised_items): try: conn = self.data_store.get_connection() for item in unsynchronised_items: r.table(self.db_article_table) \ .get(item['id']) \ .update({"synchronised_to_discord": True}) \ .run(conn) finally: DataStore.disconnect(conn)
def __init__(self): QCoreApplication.setOrganizationName('github.com/misdoro') QCoreApplication.setApplicationName('Battery tester') self.threadpool = QThreadPool() self.instr_thread() self.datastore = DataStore() signal.signal(signal.SIGTERM, self.terminate_process) signal.signal(signal.SIGINT, self.terminate_process) self.data_receivers = set() GUI(self)
def execute_pipeline(): """ Executes and calls all necessary functions to run the program. :return: """ api_key = get_api_key() # create datastore ds = DataStore() # read csv files ds.read_practices_from_csv_file("data/practices.csv") ds.read_students_from_csv_file("data/students.csv") # # create address combination file ds.create_address_csv_file() # fetch distance and duration from Google API GoogleAPI(api_key=api_key).fetch_distances_from_api('data/addresses.csv') # create all weight combination file weight_df = weight_combination.create_all_weight_combinations(ds) # extract best possible weight combinations weight_combination.extract_best_weights_students(ds, weight_df)
class StudentPracticePairTests(unittest.TestCase): def __init__(self, *args, **kwargs): super(StudentPracticePairTests, self).__init__(*args, **kwargs) self.ds = DataStore() self.ds.read_students_from_csv_file("data/test_data/students.csv") self.ds.read_practices_from_csv_file("data/test_data/practices.csv") self.pair = StudentPracticePair( self.ds.df_students.loc['S001'], self.ds.df_practices.loc['P001'], address_path="data/test_data/addresses.csv") def test_fetch_travel_duration(self): dur = self.pair._fetch_travel_duration(self.pair.student['address'], has_car=0) self.assertEqual(dur, 2207) def test_fetch_durations_for_all_addresses(self): self.assertEqual(len(self.pair.durations.keys()), 3) self.assertEqual(self.pair.durations['main_duration0'], 2207) def test_find_intersecting_specialities(self): spec = self.pair.find_intersecting_specialities() self.assertEqual(spec[0], 'notfallmedizin') def test_has_children(self): has_children = self.pair.has_children() self.assertEqual(has_children, self.ds.df_students.loc['S001']['hasChildren']) def test_get_student_address(self): pair = StudentPracticePair(self.ds.df_students.loc['S001'], self.ds.df_practices.loc['P001'], address_path="data/test_data/addresses.csv") self.assertEqual(pair.get_student_address(), "Im Wörth 8, 60433 Frankfurt am Main") def test_get_practice_address(self): self.assertEqual(self.pair.get_practice_address(), "Rhaban-Fröhlich-Straße 11, 60433 Frankfurt am Main") def test_get_fastest_transport_duration(self): dur = self.pair.get_fastest_transport_duration() self.assertEqual(dur, 116) def test_get_fastest_transport_mode(self): mode = self.pair.get_fastest_transport_mode() self.assertEqual(mode, "bicycle") def test_requires_relocation(self): self.assertEqual(self.pair.requires_relocation(), "Alternative 2")
def history(name=None): db = DataStore() settings = db.get_settings() db.shutdown() tr = request.args.get('tr') if tr is None: tr = 8 min_temp = math.floor(settings['target_temp'] - temp_range / 2) max_temp = math.ceil(settings['target_temp'] + temp_range / 2) return render_template('history.html', tr=tr, min_temp=min_temp, max_temp=max_temp, **settings)
def main(): print(" \n NHL Player Predictions\n", "=" * 23) print("Legend:", "LW = Left Wing,", "RW = Right Wing,", "C = Center,", "D = Defenseman") db_data = DataStore() df = db_data.fetch_all() df.drop(['id'], 1, inplace=True) df.drop(['team'], 1, inplace=True) play_sample = df.sample(7) df.drop(['name'], 1, inplace=True) df.drop(play_sample.index) X = np.array(df.drop(['position'], 1)) y = np.array(df['position']) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.3) clf = neighbors.KNeighborsClassifier() clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) print("Number of Data Points:", len(df.index), "\n") print("\nPredicted with Accuracy: ", accuracy * 100, "%\n") # Prediction Test names = np.array(play_sample['name']) positions = np.array(play_sample['position']) players = dict(zip(names, positions)) play_sample.drop(['name'], 1, inplace=True) play_sample.drop(['position'], 1, inplace=True) X_play = np.array(play_sample) predictions = clf.predict(X_play) outcome = [] for i in range(len(predictions)): if predictions[i] == positions[i]: outcome.append("PASS") else: outcome.append("FAIL") output = pd.DataFrame(data=np.column_stack( (predictions, positions, outcome)), index=names, columns=["Predicted", "Actual", "Outcome"]) print(output)
class YTPlaylist: def __init__(self, yt_resource: Resource, playlist_id) -> None: """ :param yt_resource: googleapiclient.discovery.Resource object :param playlist_id: youtube playlist ID """ self._yt_resource = yt_resource self._playlist_id = playlist_id self._videos_store = DataStore() def fetch_videos(self): with alive_bar(manual=True, bar='smooth', spinner='dots_reverse') as bar: nextPageToken = None page = 1 while True: pl_item_request = self._yt_resource.playlistItems().list( part='snippet', playlistId=self._playlist_id, pageToken=nextPageToken) pl_item_response = pl_item_request.execute() for item in pl_item_response['items']: self._videos_store.update(title=item['snippet']['title'], id='#') bar(page / ceil(pl_item_response['pageInfo']['totalResults'] / pl_item_response['pageInfo']['resultsPerPage'])) page += 1 nextPageToken = pl_item_response.get('nextPageToken') if not nextPageToken: break print() def print_videos(self): print_heading('<< VIDEOS IN THE PLAYLIST >>') self._videos_store.print() def get_videos_serial(self): # {Title: serial_number} video_serial = {} i = 1 for cache_unit in self._videos_store.list(): video_serial.update({cache_unit['title']: i}) i += 1 return video_serial
def test_create_new_spine_database(self): """Test that a new Spine database is created when clicking on Spine-icon tool button. """ with mock.patch("data_store.QFileDialog") as mock_file_dialog: data_store = DataStore(self.toolbox, "DS", "", dict(), 0, 0) file_path = os.path.join(data_store.data_dir, "mock_db.sqlite") mock_file_dialog.getSaveFileName.return_value = [file_path] data_store.activate() self.toolbox.ui.toolButton_new_spine.click() self.assertTrue(os.path.isfile(file_path), "mock_db.sqlite file not found.") sqlite_file = self.toolbox.ui.lineEdit_SQLite_file.text() self.assertEqual(sqlite_file, file_path) database = self.toolbox.ui.lineEdit_database.text() basename = os.path.basename(file_path) self.assertEqual(database, basename)
def test_copy_db_url_to_clipboard(self): """Test that the database url from current selections is copied to clipboard. """ # First create a DS with an sqlite db reference file_path = os.path.join(self.toolbox.project().project_dir, "mock_db.sqlite") if not os.path.exists(file_path): with open(file_path, 'w'): pass url = "sqlite:///" + file_path create_new_spine_database(url) reference = dict(database="foo", username="******", url=url) data_store = DataStore(self.toolbox, "DS", "", reference, 0, 0) data_store.activate() self.toolbox.ui.toolButton_copy_db_url.click() clipboard_text = QApplication.clipboard().text() self.assertEqual(clipboard_text, url)
def test_caching_lc(self): with fitsio.FITS(self.fits_filename) as infile: lc = infile['flux'][0:1, :].ravel() store = DataStore.from_filename(self.fits_filename) store.get('flux', aperture=0) assert np.all(store._cache[('flux', 0)] == lc)
def test_caching_array(self): with fitsio.FITS(self.fits_filename) as infile: flux = infile['flux'].read() store = DataStore.from_filename(self.fits_filename) store.get('flux') assert np.all(store._cache[('flux', None)] == flux)
class YTChannel: def __init__(self, yt_resource: Resource, channel_id) -> None: """ :param yt_resource: googleapiclient.discovery.Resource object :param channel_id: youtube channel ID """ self._yt_resource = yt_resource self._channel_id = channel_id self._playlist_store = DataStore() @property def total_playlists(self): return self._playlist_store.len def fetch_playlists(self): with alive_bar(manual=True, bar='smooth', spinner='dots_reverse') as bar: nextPageToken = None page = 1 while True: pl_request = self._yt_resource.playlists().list( part='snippet', channelId=self._channel_id, pageToken=nextPageToken) pl_response = pl_request.execute() for item in pl_response['items']: self._playlist_store.update(title=item['snippet']['title'], id=item['id']) bar(page / ceil(pl_response['pageInfo']['totalResults'] / pl_response['pageInfo']['resultsPerPage'])) page += 1 nextPageToken = pl_response.get('nextPageToken') if not nextPageToken: break print() def print_playlists(self): print_heading('<< PLAYLISTS >>') self._playlist_store.print() def select_playlist(self, playlist_no): return self._playlist_store.list()[playlist_no - 1]
def __init__(self, parent, lineno): self.logger = Logger(self, Logger.INFO) self.logger.debug("constructor") tkinter.Frame.__init__(self, parent) self.data_store = DataStore.get_instance() self.index = lineno self.name = "Hole %d" % (lineno + 1) self.line_name = tkinter.Label(self, text=self.name, width=12) self.line_name.grid(row=lineno + 1, column=0, sticky=tkinter.W) self.inter_ctl = tkinter.Entry(self, width=5, validate="focusout", validatecommand=self.change_interval) self.inter_ctl.bind('<Return>', self.change_interval) self.inter_ctl.bind('<Tab>', self.change_interval) self.inter_ctl.grid(row=lineno + 1, column=1) self.note_ctl_txt = tkinter.StringVar() self.note_ctl = tkinter.Label(self, textvariable=self.note_ctl_txt, width=12) self.note_ctl.grid(row=lineno + 1, column=2) self.freq_ctl_txt = tkinter.StringVar() self.freq_ctl = tkinter.Label(self, textvariable=self.freq_ctl_txt, width=12) self.freq_ctl.grid(row=lineno + 1, column=3) self.hole_ctl = HoleSizeWidgit(self, lineno) self.hole_ctl.config(padx=25) self.hole_ctl.grid(row=lineno + 1, column=4) self.locat_ctl_txt = tkinter.StringVar() self.locat_ctl = tkinter.Label(self, textvariable=self.locat_ctl_txt, width=12) self.locat_ctl.grid(row=lineno + 1, column=5) self.diff_ctl_txt = tkinter.StringVar() self.diff_ctl = tkinter.Label(self, textvariable=self.diff_ctl_txt, width=12) self.diff_ctl.grid(row=lineno + 1, column=6) self.cutoff_ctl_txt = tkinter.StringVar() self.cutoff_ctl = tkinter.Label(self, textvariable=self.cutoff_ctl_txt, width=12) self.cutoff_ctl.grid(row=lineno + 1, column=7) self.set_state() self.logger.debug("end constructor")
def test_load_reference(self): """Test that reference is loaded into selections on Data Store creation, and then shown in the ui when Data Store is activated. """ # FIXME: For now it only tests sqlite references file_path = os.path.join(self.toolbox.project().project_dir, "mock_db.sqlite") if not os.path.exists(file_path): with open(file_path, 'w'): pass url = "sqlite:///" + file_path create_new_spine_database(url) reference = dict(database="foo", username="******", url=url) data_store = DataStore(self.toolbox, "DS", "", reference, 0, 0) data_store.activate() dialect = self.toolbox.ui.comboBox_dialect.currentText() database = self.toolbox.ui.lineEdit_database.text() username = self.toolbox.ui.lineEdit_username.text() self.assertEqual(dialect, 'sqlite') self.assertEqual(database, 'foo') self.assertEqual(username, 'bar')
def __init__(self, key, years): """Initialise Cassandra Args: key: String of TBA key. years: List of the years in which to cache results. """ self.years = years self.key = key # cache previous results events = {} matches = {} base_url = "https://www.thebluealliance.com/api/v3" header = {"X-TBA-Auth-Key": self.key} # fetch events by year and order chronologically for year in years: r = requests.get(base_url + "/events/" + str(year) + "/simple", headers=header).json() # sort by date and don't include offseason events a = sorted(r, key=lambda b: b["start_date"]) a = [i["key"] for i in a if i["event_type"] < 99] events[str(year)] = a # fetch matches by year and event for year in years: for event in events[str(year)]: r = requests.get(base_url + "/event/" + event + "/matches/simple", headers=header).json() matches[event] = r # save to cache store = DataStore(new_data_store=True, year_events=events) for year in years: for event in events[str(year)]: event_matches = matches[event] store.add_event_matches(str(year), event, event_matches)
def bootstrap_db(from_scratch): config = Configuration() conn = DataStore().get_connection() if from_scratch and config.get_db_name() in list(r.db_list().run(conn)): print('Forcing - dropping existing db') r.db_drop(config.get_db_name()).run(conn) if config.get_db_name() not in list(r.db_list().run(conn)): r.db_create(config.get_db_name()).run(conn) else: print(f"{config.get_db_name()} db already exists") if config.get_article_table() not in list(r.db(config.get_db_name()).table_list().run(conn)): r.db(config.get_db_name()).table_create(config.get_article_table()).run(conn) r.db(config.get_db_name()).table(config.get_article_table()).index_create('title').run(conn) else: print(f"{config.get_article_table()} table already exists") DataStore.disconnect(conn)
def mm_to_in(val, round=False): ''' Simply convert the value given from MM to inches and round to the increment ''' logger.debug(sys._getframe().f_code.co_name) from data_store import DataStore as ds # avoid a circular dependency data = ds.get_instance() if round: return rnd(val / 25.4, data.internal_data['hole_in_inc']) else: return val / 25.4
def start(): setup_logging() logging.getLogger('pyro').info("started") db = DataStore(setup=True) db.apply_active_profile() settings = db.get_settings() settings['enabled'] = 0 db.save_settings(settings) settings = db.get_settings() logging.getLogger('pyro').debug('starting with settings: %s'% settings) control = Control() control.start() print('starting web') web.start()
def do_train(args): # Load configuration config = ConfigParser() config.read_file(args.config) data = DataStore(config) # Create the CRF model. model = CRF(config) retrain_epochs = config["training"].getint("retrain_every") accuracy = [] with EditShell(config) as shell: while data.has_next(): conll = data.next() i = data.i() # if the data doesn't have tags, try to smart-tag them. if len(conll[0]) == DataStore.TAG_LABEL+1: tags = [tok[DataStore.TAG_LABEL] for tok in conll] else: tags = model.infer(conll) try: #conll_display = ["{}/{}".format(token[0], token[2]) for token in conll] conll_display = ["{}".format(token[0]) for token in conll] # Create a copy of the list action = shell.run(conll_display, list(tags), metadata=render_progress(data, accuracy)) if action.type == ":prev": try: data.rewind(2) # move 2 indices back except AttributeError: data.rewind(1) elif action.type == ":goto": doc_idx, = action.args assert doc_idx >= 0 data.goto(doc_idx) elif action.type == "save": _, tags_ = action.args accuracy.append(score(tags, tags_)) data.update(conll, tags_) if i % retrain_epochs == 0: model.retrain() except QuitException: break
def settings(): settings = request.get_json() db = DataStore() print 'Saving new settings: ' % settings db.save_settings(settings) return "ok"
def temps(idx=0): db = DataStore() temps = db.get_temps(idx) db.shutdown() return json.dumps(temps)
def test_get_and_bin(self): store = DataStore.from_filename(self.fits_filename) with mock.patch.object(store, 'get') as mock_get: mock_get.return_value = np.array([1, 1, 2, 2]) value = store.get_and_bin('not used', npts=2, aperture=0) assert np.all(value[0] == np.array([1, 2]))
def test_bin_2d(self): arr = np.array([[1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2]]) assert np.all(DataStore.bin_2d(arr, 2) == np.array([[1, 1, 1], [2, 2, 2]]))
def test_bin_1d(self): arr = np.array([1, 1, 2, 2]) assert np.all(DataStore.bin_1d(arr, 2)[0] == np.array([1, 2]))
def profiles(): db = DataStore() if request.method == 'POST': _profiles = request.get_json() for _profile in _profiles: if 'id' in _profile and _profile['id'] is not u'': logging.getLogger('pyro').debug('updating: %s' % _profile) db.save_profile(_profile) else: logging.getLogger('pyro').debug('adding: %s' % _profile) db.add_profile(_profile) db.shutdown() return 'ok' elif request.method == 'DELETE': _profile = request.get_json() logging.getLogger('pyro').debug('deleting: %s' % _profile) db.delete_profile(_profile['id']) db.shutdown() return 'ok' else: _profiles = db.get_profiles() db.shutdown() return json.dumps(_profiles)
def enabled(): _settings = request.get_json() db = DataStore() logging.getLogger('pyro').debug('Setting enabled: %s' % _settings) db.set_enabled(_settings['enabled']) return 'ok'
def test_get_timeseries(self): store = DataStore.from_filename(self.fits_filename) assert len(store.get('flux', aperture=0).shape) == 1
def test_get_array(self): store = DataStore.from_filename(self.fits_filename) assert len(store.get('flux').shape) == 2