def setup(): ep_version = eprinttools.version() ds_version = dataset.version() eprint_url = os.getenv("EPRINT_URL") auth_type = os.getenv("EPRINT_AUTH_TYPE") username = os.getenv("EPRINT_USERNAME") secret = os.getenv("EPRINT_PASSWORD") collection_name = "test_get_eprint_xml.ds" if eprint_url == None or eprint_url == "": print(f"Skipping tests for eprinttools {ep_version}, EPRINT_URL not set in the environment") sys.exit(1) if os.path.exists(collection_name) == False: ok = dataset.init(collection_name) if ok == False: print(f"Could not init {collection_name}") sys.exit(1) if auth_type == None: auth_type = "" if username == None: username = "" if secret == None: secret = "" return eprint_url, auth_type, username, secret, collection_name
def test_get_eprint_xml(t, eprint_url, auth_type, username, secret, collection_name): if os.path.exists(collection_name): shutil.rmtree(collection_name) ok = dataset.init(collection_name) if ok == False: t.error(f"Can't initialize {collection_name}") return t.verbose_off() # turn verboseness on for debugging test_name = t.test_name() cfg = eprinttools.cfg(eprint_url, auth_type, username, secret, collection_name) keys = eprinttools.get_keys(cfg) if len(keys) == 0: t.error(f"Can't test {test_name} without keys, got zero keys") return collection_keys = [] check_keys = [] for i in range(100): key = random.choice(keys) if key not in check_keys: check_keys.append(key) if len(check_keys) > 50: break t.print(f"Calculating the keys in sample that will get stored in the collection {collection_name}") for key in check_keys: # We are going to try to get the metadata for the EPrint record but not store it in a dataset collectin... ok = eprinttools.get_eprint_xml(cfg, key) e_msg = eprinttools.error_message() if ok == False or e_msg != "": if e_msg.startswith("401") == False: t.error(f"Expected data for {key}, got {ok}, {e_msg}") else: t.print(f"found {key}, requires authentication") else: t.print(f"found {key} with data, checking dataset for record") data = dataset.read(collection_name, key) e_msg = dataset.error_message() if len(data) == 0: t.error(f"{key} in {collection_name} empty record, {e_msg}") if e_msg != "": t.error(f"{key} in {collection_name} error, {e_msg}")
from utils import shuffle_split from random import randint, shuffle from math import floor from time import time from GaussianNB import GaussianNB from sklearn.naive_bayes import BernoulliNB from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier, BaggingClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier features_list = init() split_ratio = 0.7 # training_set, testing_set = split_dataset(features_list, 0.7) # training_labels, testing_labels = split_dataset(labels, 0.7) training_set, testing_set, training_labels, testing_labels = shuffle_split( *init(), split_ratio, shuffle=True) # scaling sc = StandardScaler() sc.fit(training_set) training_set = sc.transform(training_set) testing_set = sc.transform(testing_set) # naive bayes gnb = GaussianNB() bnb = BernoulliNB()
import sys import os import json import dataset def stop(msg): print(msg) sys.exit(1) # # create a collection with init # err = dataset.init("friends.ds") if err != '': stop(err) err = dataset.init("favorites.ds") if err != '': stop(err) err = dataset.init("characters.ds") if err != '': stop(err) # # create, read, update and delete # ## create err = dataset.create("friends.ds", "frieda", {
def get_wos_refs(new=True): #New=True will download everything from scratch and delete any existing records collection = 'wos_refs.ds' if new == True: if os.path.exists(collection) == True: shutil.rmtree(collection) if os.path.isdir(collection) == False: ok = dataset.init(collection) if ok == False: print("Dataset failed to init collection") exit() #Run query to get scope of records token = os.environ['WOSTOK'] headers = {'X-ApiKey': token, 'Content-type': 'application/json'} base_url = 'https://api.clarivate.com/api/wos/?databaseId=WOK' collected = dataset.has_key(collection, "captured") if collected == True: date = dataset.read(collection, "captured") date = date[0]['captured'] date = datetime.fromisoformat(date) current = datetime.today() diff = (current - date) base_url = base_url + '&loadTimeSpan=' + str(diff.days) + 'D' url = base_url + '&count=1&firstRecord=1&usrQuery=OG=California%20Institute%20of%20Technology' incomplete = dataset.has_key(collection, "incomplete") if incomplete == True: query = dataset.read(collection, "incomplete") query_id = query[0]['incomplete'] query = dataset.read(collection, "record_start") record_start = query[0]['record_start'] query = dataset.read(collection, "record_count") record_count = query[0]['record_count'] else: response = requests.get(url, headers=headers) response = response.json() record_count = response['QueryResult']['RecordsFound'] print(record_count) query_id = response['QueryResult']['QueryID'] dataset.create(collection, 'incomplete', {"incomplete": query_id}) record_start = 1 dataset.create(collection, 'record_start', {"record_start": record_start}) dataset.create(collection, 'record_count', {"record_count": record_start}) query_url = 'https://api.clarivate.com/api/wos/query/' while record_count > 0: print(record_start) if record_count > 100: url = query_url + str(query_id) + '?count=100&firstRecord=' +\ str(record_start) response = requests.get(url, headers=headers) response = response.json() print(response) save_records(collection, response['Records']['records']['REC']) record_start = record_start + 100 record_count = record_count - 100 dataset.update(collection, 'record_start', {"record_start": record_start}) dataset.update(collection, 'record_count', {"record_count": record_count}) else: url = query_url + str(query_id) + '?count=' +\ str(record_count) + '&firstRecord='+ str(record_start) response = requests.get(url, headers=headers) response = response.json() save_records(collection, response['Records']['records']['REC']) record_count = 0 date = datetime.today().isoformat() record = {"captured": date} if dataset.has_key(collection, "captured"): err = dataset.update(collection, 'captured', record) if err != "": print(f"Unexpected error on update: {err}") else: err = dataset.create(collection, 'captured', record) if err != "": print(f"Unexpected error on create: {err}") dataset.delete(collection, 'incomplete')
def reset(self): dataset.init()
#from admin import Admin #from teacher import Teacher #from student import Student from account import Account import dataset def login(): username = input("输入用户名:") password = input("输入密码:") return dataset.verify(username, password) if __name__ == "__main__": dataset.init() command = "" account = login() while account is None: account = login() print("成功登录") while command != "quit" and command != "exit": command = input(">>") if command == 'help': account.help() print("command: " + command) account.do_command(command)