Python DataStore.load_data Examples

Programming Language: Python

Namespace/Package Name: datastore

Class/Type: DataStore

Method/Function: load_data

Examples at hotexamples.com: 2

Python DataStore.load_data - 2 examples found. These are the top rated real world Python examples of datastore.DataStore.load_data extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataStore(30)

collection(7)

connect(6)

delete(5)

create(5)

listItems(4)

roll_back(3)

close(3)

get_similar_ngrams_dictionary(3)

flush(2)

disconnect(2)

errorMovingFileStatusCode(2)

errorPathDoesntExistStatusCode(2)

exists(2)

get_all_poi(2)

getNotAddressedTwitterEvents(2)

getUserDetailsByTwitterHandle(2)

get_events(2)

get_poi_filtered(2)

get_similar_dictionary(2)

get_status(2)

insert_database(2)

load_data(2)

operationCompleteStatusCode(2)

put(2)

create_poi(2)

update_poi(2)

addFilePathToDataBaseStoreWithType(2)

add_humidity(2)

checkIftweetDataExists(2)

add(2)

cooked_file_name(2)

insert(1)

getbaseline(1)

get_value(1)

get_type(1)

get_submissions(1)

addFileToDatabase(1)

count_messages(1)

add_lux(1)

insert_all(1)

get_references(1)

add_temperature(1)

get_poi_by_id(1)

get_humidity(1)

get_highest_rating(1)

archiveManagerJobsReadyToComplete(1)

get_requests(1)

insert_peers(1)

insert_course(1)

Example #1

Show file

class MySpider(object):
    def __init__(self, root_url):
        self.parser = HtmlParser()
        self.storage = DataStore()
        self._get_root_urls(root_url)

    def _get_root_urls(self, root_url):
        if os.path.exists('job_class.json'):
            pass
        else:
            new_urls = self.parser.get_url(root_url)
            self.storage.local_store(new_urls,
                                     'job_class.json')  #存储要爬取的行业类别url

    def joburl_init(self, pagenum, path='job_class.json'):
        root_urls = self.storage.load_data(path)
        jobs_dict = {}
        for i in pagenum:
            for list in root_urls:
                jobs_dict[list +
                          str(i)] = root_urls[list] + str(i)  #构造要爬取的网址链接
        self.storage.local_store(jobs_dict, 'job_page_url.json')  #存储构造好的网址链接

    def company_url(self, path='job_page_url.json'):
        company_urls = self.storage.load_data(path)
        company_dicts = {}
        url_get = 0  #已获取的网址总数
        for company_info_url in company_urls:
            print("待爬取的行业网址总数:", len(company_urls) - url_get)
            url_get += 1
            url = company_urls[company_info_url]
            company_dicts.update(self.parser.getcompany_url(url))
            self.storage.local_store(url, 'job_page_url_old.json')  #存储已爬取的网址
        self.storage.local_store(company_dicts,
                                 'company_info_url_new.json')  #存储公司信息的URL

    def company_info(self, path='company_info_url_new.json'):
        company_info_urls = self.storage.load_data(path)
        url_get = 0  #以获取的公司信息网址总数
        for company_name in company_info_urls:
            print("待爬取的公司信息网址总数:", len(company_info_urls) - url_get)
            url_get += 1
            url = company_info_urls[company_name]
            self.parser.getcompany_info(company_name, url)
            self.storage.local_store(
                url, 'compang_info_url_old.json')  #存储以爬取的存储公司信息URL

    #从上次断点出重新开始获取公司信息
    def grab_increment(self):
        new_urls = self.storage.load_data('company_info_url_new.json')
        old_urls = self.storage.load_data('compang_info_url_old.json')
        for company_name in new_urls:
            new_url = new_urls[company_name]
            if new_url not in old_urls:
                self.parser.getcompany_info(company_name, url)
                self.storage.local_store(
                    url, 'compang_info_url_old.json')  # 存储以爬取的存储公司信息URL

Example #2

Show file

import pickle
import tensorflow as tf
import numpy as np
from datastore import DataStore
from model_def import CombinedModel
from tensorflow.keras.optimizers import Adam

# pass the mode OLD to read the picle datastore object
MODE = "OLD"

if MODE == "NEW":
    datastore = DataStore()
    # interactions file
    datastore.load_data("agent_interactions.jsl")
    datastore.build()
    with open('dataset_pick.pkl', 'wb') as o:
        pickle.dump(datastore, o, pickle.HIGHEST_PROTOCOL)

else:
    with open("dataset_pick.pkl", "rb") as p:
        datastore = pickle.load(p)

print(datastore.actions.shape)

combined_model = CombinedModel(16, 8, 4)
optimizer = Adam(learning_rate=0.00001)


@tf.function
def train_step(train_states, prev_states, current_states, actions):
    with tf.GradientTape() as tape: