def main(): # new_i2s = hp.load('new_i2s.dic') # filename_i2s = traverse(new_i2s) filename_i2s = hp.load('filename_i2s.dic') # hp.save(filename_i2s, 'filename_i2s.dic') # print filename_i2s # print len(filename_i2s) # segments('TRAAAAW128F429D538.h5') print '--- speed collection ---' ''' (Time_Sec, Section_Number, Average_BPM) = speed_allocate(filename_i2s) hp.save(Time_Sec, 'Time_Sec.dic') hp.save(Section_Number, 'Section_Number.dic') hp.save(Average_BPM, 'Average_BPM.dic') ''' TS = hp.load('Time_Sec.dic') SN = hp.load('Section_Number.dic') ABPM = hp.load('Average_BPM.dic') plot_distribution(TS) plot_distribution(SN) plot_distribution(ABPM)
def __init__(self, user_Id, impersonation = False): super(user, self).__init__() self.user_Id = user_Id self.impersonation = impersonation if self.impersonation: self.file_prefs = (DIR_SETTINGS + 'gDrivePro.json') prefs = load(self.file_prefs) # client_id = prefs.get('client_id', None) or raw_input('Please enter the id for your Google API client: ') # client_secret = prefs.get('client_secret', None) or raw_input('Please enter the secret for your Google API client: ') # scope = ["https://www.googleapis.com/auth/drive"] tokens = prefs.get('tokens') data = load(DIR_SETTINGS + 'oauth2service.json') private_key = data.get('private_key') client_email = data.get('client_email') scope = ['https://www.googleapis.com/auth/drive'] self.auth = auth.ServiceAuthenticator(client_email, private_key, scope, tokens) else: pass self.file = files(self) self.proc = processor(self)
def __init__(self, num_agents, path='churn_modelling.csv'): self.path = path self.dataset = Dataset(path=self.path) self.X, self.y = self.dataset.preprocess() self.X_train, self.X_test, self.y_train, self.y_test = self.dataset.split( self.X, self.y, fraction=0.2) self.X_train, self.X_test = self.dataset.scale(self.X_train, self.X_test) self.train_loader, self.test_loader = H.load(self.X_train, self.X_test, self.y_train, self.y_test) self.input_dims = [self.X.shape[1]] self.output_dims = len(np.unique(self.y)) print("Dims of X_train is {}".format( H.get_dimensions(data=self.X_train))) print("Dims of y_train is {}".format( H.get_dimensions(data=self.y_train))) print("Input dims is {}, output dims is {}".format( self.input_dims, self.output_dims)) self.num_agents = num_agents self.func = 'relu' self.agent_states = [[] for i in range(self.num_agents)] self.activations = [self.func] * self.num_agents #self.agent_states = nodes print("env initialised here ...") self.model = [ Model(0.01, self.input_dims, self.output_dims, self.agent_states[i], self.activations, self.train_loader, self.test_loader) for i in range(self.num_agents) ] for i in range(self.num_agents): self.model[i] = self.model[i].to(device)
def get_partition_from_job(self, job): if job.uuid in self.partition_cache: return self.partition_cache[job.uuid] try: client = zerorpc.Client() client.connect(job.address) obj_str = client.take(job.uuid) if obj_str is None: raise Exception("get_partition_from_job: Can't be None.") except zerorpc.RemoteError as e: if e.name == JobTaken.__name__: print warn('Remote job is taken. Skip.') elif e.name == JobFinished.__name__: print warn('Remote job is finished. Skip.') else: print error('Remote error at getting partition. Skip.') return None except zerorpc.LostRemote: print error('Lost remote at getting partition. Skip.') return None else: logger.info('take job:' + job.address) partition = load(obj_str) self.partition_cache[job.uuid] = partition return partition
def __init__(self): super(fileHandler, self).__init__() # define filenames self.file_files = 'files.json' self.file_folders = 'folders.json' self.file_copyFiles = 'copyFiles.json' self.file_newFiles = 'newFiles.json' self.file_newFolders = 'newFolders.json' # initialize variables self.files = load(self.file_files) self.folders = load(self.file_folders) self.copyFiles = load(self.file_copyFiles) self.newFiles = load(self.file_newFiles) self.newFolders = load(self.file_newFolders)
def insert_into_database(): base_dir = "/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/translation_start/" organisms = os.listdir(base_dir) data = defaultdict(dict) for org_name in organisms: work_dir = base_dir + org_name + "/" save_fn = work_dir + "seqs.pickle" data_raw = helper.load(save_fn) data_raw["neg"] = [s for s in data_raw["neg"] if len(s) != 0][0:6000] data_raw["pos"] = [s for s in data_raw["pos"] if len(s) != 0][0:60] labels = [-1] * len(data_raw["neg"]) + [1] * len(data_raw["pos"]) examples = [e.upper() for e in (data_raw["neg"] + data_raw["pos"])] data[org_name]["LT"] = labels data[org_name]["XT"] = examples import data_processing data_processing.prepare_multi_datasets(data, 0.35, num_splits=7, description="start_codon tiny", feature_type="string", write_db=True, random=True)
def insert_into_database(): base_dir = "/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/translation_start/" organisms = os.listdir(base_dir) data = defaultdict(dict) for org_name in organisms: work_dir = base_dir + org_name + "/" save_fn = work_dir + "seqs.pickle" data_raw = helper.load(save_fn) data_raw["neg"] = [s for s in data_raw["neg"] if len(s)!=0][0:6000] data_raw["pos"] = [s for s in data_raw["pos"] if len(s)!=0][0:60] labels = [-1]*len(data_raw["neg"]) + [1]*len(data_raw["pos"]) examples = [e.upper() for e in (data_raw["neg"] + data_raw["pos"])] data[org_name]["LT"] = labels data[org_name]["XT"] = examples import data_processing data_processing.prepare_multi_datasets(data, 0.35, num_splits=7, description="start_codon tiny", feature_type="string", write_db=True, random=True)
def load_splice_data(): """ load splice-site data """ base_dir = "/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/translation_start/" organisms = os.listdir(base_dir) organisms = ["d_melanogaster", "m_musculus", "h_sapiens", "b_taurus"] task_sim = np.ones((4,4)) + np.eye(4) dat = defaultdict(dict) num_xt = 0 for org_name in organisms: print "processing", org_name work_dir = base_dir + org_name + "/" save_fn = work_dir + "seqs_acc.pickle" result = helper.load(save_fn) neg = result["neg"]#[0:10000] pos = result["pos"]#[0:10000] assert type(neg) == type(pos) == list dat[org_name]["xt"] = neg + pos dat[org_name]["lt"] = [-1.0]*len(neg) + [1.0]*len(pos) num_xt += len(neg) + len(pos) print "num_xt", num_xt return dat, task_sim
def perform_orange_clustering(mss_id): import orange from task_similarities import TreeNode import helper #(dist_full, id_to_name) = compute_task_distances(mss_id) p = '/fml/ag-raetsch/home/cwidmer' (dist_full, id_to_name) = helper.load(p + "/dist") l = [] for i in range(len(dist_full)): l.append([]) for j in range(i + 1, len(dist_full)): l[i].append(dist_full[i, j]) l.reverse() m = orange.SymMatrix(l) root = orange.HierarchicalClustering( m, linkage=orange.HierarchicalClustering.Average) root_node = TreeNode("root") clusters = [root] nodes = [root_node] while len(clusters) > 0: cluster = clusters.pop(0) node = nodes.pop(0) # append nodes if non-empty if cluster.left: clusters.append(cluster.left) name = str(tuple(cluster.left)) if len(tuple(cluster.left)) == 1: name = id_to_name[tuple(cluster.left)[0]] print name # create nodes left_node = TreeNode(name) node.add_child(left_node, 1.0) nodes.append(left_node) # append nodes if non-empty if cluster.right: clusters.append(cluster.right) name = str(tuple(cluster.right)) if len(tuple(cluster.right)) == 1: name = id_to_name[tuple(cluster.right)[0]] print name # create nodes right_node = TreeNode(name) node.add_child(right_node, 1.0) nodes.append(right_node) return root_node
def handle(self, input): result = self.pattern.match(input) self.script_name = result.group(1) self.regex = result.group(2) file_contents = helper.load(self.script_name) self.result =helper.extract(file_contents, self.regex)
def _dispatch(cmd, dataset, dataset_param, additional, **kwargs): """Dispatch command to be run. """ graph = helper.load(ria.one_graph(), dataset, dataset_param) for item in additional: with open(item) as fp: dataset_io.load(graph, fp) logging.info("Start analyzing.") cmd(graph=graph, **kwargs)
def run(method, method_param, dataset, dataset_param, **kwargs): """Prepare a review graph, load a dataset to it, and execute analyze. Args: method: name of the method to be run. method_param: list of strings representing key-value pairs. dataset: name of the dataset to be loaded. dataset_param: list of strings representing key-value pairs. """ graph = helper.graph(method, method_param) analyze(helper.load(graph, dataset, dataset_param), **kwargs)
def __init__(self, board): self.board = board self.value_pieces = { chess.PAWN: 100, chess.KNIGHT: 320, chess.BISHOP: 330, chess.ROOK: 500, chess.QUEEN: 900, chess.KING: 20000 } self.model = load() self.movility_bonus = { #para negras hay que hacer movility_bonus[piece][63-casilla] chess.PAWN: np.array( (0, 0, 0, 0, 0, 0, 0, 0, 5, 10, 10, -20, -20, 10, 10, 5, 5, -5, -10, 0, 0, -10, -5, 5, 0, 0, 0, 20, 20, 0, 0, 0, 5, 5, 10, 25, 25, 10, 5, 5, 10, 10, 20, 30, 30, 20, 10, 10, 50, 50, 50, 50, 50, 50, 50, 50, 0, 0, 0, 0, 0, 0, 0, 0)), chess.KNIGHT: np.array((-50, -40, -30, -30, -30, -30, -40, -50, -40, -20, 0, 5, 5, 0, -20, -40, -30, 5, 10, 15, 15, 10, 5, -30, -30, 0, 15, 20, 20, 15, 0, -30, -30, 5, 15, 20, 20, 15, 5, -30, -30, 0, 10, 15, 15, 10, 0, -30, -40, -20, 0, 0, 0, 0, -20, -40, -50, -40, -30, -30, -30, -30, -40, -50)), chess.BISHOP: np.array((-20, -10, -10, -10, -10, -10, -10, -20, -10, 5, 0, 0, 0, 0, 5, -10, -10, 10, 10, 10, 10, 10, 10, -10, -10, 0, 10, 10, 10, 10, 0, -10, -10, 5, 5, 10, 10, 5, 5, -10, -10, 0, 5, 10, 10, 5, 0, -10, -10, 0, 0, 0, 0, 0, 0, -10, -20, -10, -10, -10, -10, -10, -10, -20)), chess.ROOK: np.array((0, 0, 0, 5, 5, 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, -5, -5, 0, 0, 0, 0, 0, 0, -5, -5, 0, 0, 0, 0, 0, 0, -5, -5, 0, 0, 0, 0, 0, 0, -5, -5, 0, 0, 0, 0, 0, 0, -5, 5, 10, 10, 10, 10, 10, 10, 5, 0, 0, 0, 0, 0, 0, 0, 0)), chess.QUEEN: np.array( (-20, -10, -10, -5, -5, -10, -10, -20, -10, 0, 5, 0, 0, 0, 0, -10, -10, 5, 5, 5, 5, 5, 0, -10, 0, 0, 5, 5, 5, 5, 0, -5, -5, 0, 5, 5, 5, 5, 0, -5, -10, 0, 5, 5, 5, 5, 0, -10, -10, 0, 0, 0, 0, 0, 0, -10, -20, -10, -10, -5, -5, -10, -10, -20)), chess.KING: np.array( (20, 30, 10, 0, 0, 10, 30, 20, 20, 20, 0, 0, 0, 0, 20, 20, -10, -20, -20, -20, -20, -20, -20, -10, -20, -30, -30, -40, -40, -30, -30, -20, -30, -40, -40, -50, -50, -40, -40, -30, -30, -40, -40, -50, -50, -40, -40, -30, -30, -40, -40, -50, -50, -40, -40, -30, -30, -40, -40, -50, -50, -40, -40, -30)) } self.transp_table = {}
def __init__(self, path='churn_modelling.csv'): self.path = path self.dataset = Dataset(path = self.path) self.X, self.y = self.dataset.preprocess() self.X_train, self.X_test, self.y_train, self.y_test = self.dataset.split(self.X, self.y, fraction=0.2) self.X_train, self.X_test = self.dataset.scale(self.X_train, self.X_test) self.train_loader, self.test_loader = H.load(self.X_train, self.X_test, self.y_train, self.y_test) self.input_dims = [self.X.shape[1]] self.output_dims = len(np.unique(self.y)) print("Dims of X_train is {}".format(H.get_dimensions(data = self.X_train))) print("Dims of y_train is {}".format(H.get_dimensions(data = self.y_train))) print("Input dims is {}, output dims is {}".format(self.input_dims, self.output_dims)) self.model1 = TestModel(self.input_dims, self.output_dims, 0.005, 3, 3, self.train_loader, self.test_loader) self.model2 = TestModel(self.input_dims, self.output_dims, 0.005, 3, 3, self.train_loader, self.test_loader)
def get_partition(self, uuid): if uuid in self.cache: return self.cache[uuid] if uuid in self.results: # create a new list to prevent iteration error for result in list(self.results[uuid]): try: c = zerorpc.Client() c.connect(result.address) partition_result = load(c.fetch_partition(uuid)) self.cache[uuid] = partition_result return partition_result except zerorpc.RemoteError, zerorpc.LostRemote: continue
def create_normalizer_from_taxonomy(taxonomy): """ creates kernel normalizer with similarities set from hop-distance according to taxnomoy """ #TODO fix --> num tasks can be computed from leaves etc... # fetch taxonomy # taxonomy = param.taxonomy.data print "WARNING; HARDCODED DISTANCE MATRIX IN HERE" hardcoded_distances = helper.load("/fml/ag-raetsch/home/cwidmer/svn/projects/alt_splice_code/src/task_sim_tis.bz2") # set normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) # compute distances distances = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) similarities = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for (i,task_name_lhs) in enumerate(data.get_task_names()): for (j, task_name_rhs) in enumerate(data.get_task_names()): distances[i,j] = task_similarities.compute_hop_distance(taxonomy, task_name_lhs, task_name_rhs) # normalize distances distances = distances / numpy.max(distances) # set similarity for (i, task_name_lhs) in enumerate(data.get_task_names()): for (j, task_name_rhs) in enumerate(data.get_task_names()): similarity = param.base_similarity - distances[i,j] normalizer.set_task_similarity(i, j, similarity) # save for later similarities[i,j] = similarity return normalizer
def speed_allocate(inventory): tag = hp.load('my_tag.dic') Time_Sec = {} Section_Number = {} Average_BPM = {} for filename in inventory.keys(): div = filename.split('/') ID = div[len(div)-1][:-3] genr = tag[ID] # print genr (total_time_sec, section_number, average_bpm) = hp.speed_feature(filename) if not Time_Sec.has_key(genr): Time_Sec[genr] = [] Section_Number[genr] = [] Average_BPM[genr] = [] Time_Sec[genr].append(total_time_sec) Section_Number[genr].append(section_number) Average_BPM[genr].append(average_bpm) return (Time_Sec, Section_Number, Average_BPM)
def getDecoded(path): """ Loads path, gets the map, returns it. """ global realmap if not path in realmap: lastLine = helper.load(path).rstrip().split("\n")[-1] mapPattern = re.compile(r'%\[((?:\d+:\d+,)*(?:\d+:\d+))\]') match = mapPattern.match(lastLine) if not match: logging.error("Input file not properly formatted.") logging.error("\tPlease run it through the preprocessor again.") logging.error("This means that the linenumbers will point to") logging.error("the preprocessed file rather than the unprocessed one.") realmap[path] = "not found" else: realmap[path] = match.group(1).split(",") return realmap[path]
def plot_file(data_name): import pylab #prefix = '/fml/ag-raetsch/home/cwidmer/svn/projects/mtl_dcd_submission/results/' prefix = '/fml/ag-raetsch/home/cwidmer/svn/projects/2012/mtl_dcd/results/' #fn_dcd = prefix + "result_" + data_name + "_dcd_shogun.pickle" #fn_mtk = prefix + "result_" + data_name + "_mtk_shogun.pickle" #fn_dcd = prefix + "result_newkids_" + data_name + "_dcd_shogun.pickle" #fn_mtk = prefix + "result_newkids_" + data_name + "_mtk_shogun.pickle" fn_dcd = prefix + "result_newkids_nitro_" + data_name + "_dcd_shogun.pickle" fn_mtk = prefix + "result_newkids_nitro_" + data_name + "_mtk_shogun.pickle" solvers = {"proposed DCD": fn_dcd, "baseline MTK": fn_mtk} colors = {"proposed DCD": "blue", "baseline MTK": "red"} for solver_name, fn in solvers.items(): dat = helper.load(fn) tt = np.array(dat["time"], dtype=np.float64) / 1000.0 + 1.0 rd = dat["fun_diff"] pylab.plot(tt, rd, "-o", label=solver_name, linewidth=0.5, alpha=0.6, color=colors[solver_name]) pylab.yscale("log") pylab.xscale("log") pylab.xlabel("time (s)") pylab.ylabel("function difference") #TODO relative! pylab.grid(True) pylab.legend(loc="upper right") pylab.show()
def test_iteration(self): """Test analyze runs at most given number of iteration. """ with tempfile.NamedTemporaryFile() as fp: store_dataset(fp) fp.flush() graph = helper.load(helper.graph("ria", ["alpha=2"]), "file", ["file={0}".format(fp.name)]) buf = StringIO() analyze.analyze(graph, output=buf, loop=5, threshold=0) res = defaultdict(list) for line in buf.getvalue().split("\n"): if not line: continue obj = json.loads(line) res[obj["iteration"]].append(obj) self.assertIn(0, res) self.assertIn(5, res) self.assertNotIn(6, res) self.assertIn("final", res)
def plot_learning_curve(data_fn): import pylab d = helper.load(data_fn) print d n_dcd = [t for i,t in enumerate(d["num_xt"]) if d["time"][0][i] != 0] n_mtk = [t for i,t in enumerate(d["num_xt"]) if d["time"][1][i] != 0] t_dcd = [t for t in d["time"][0] if t != 0] t_mtk = [t for t in d["time"][1] if t != 0] pylab.plot(n_mtk, t_mtk, "-o", linewidth=0.5, alpha=0.6, label="baseline MTK", color="red") pylab.plot(n_dcd, t_dcd, "-o", linewidth=0.5, alpha=0.6, label="proposed DCD", color="blue") pylab.yscale("log") #pylab.xscale("log") pylab.xlabel("number of training examples") pylab.ylabel("training time (s)") pylab.legend(loc=2) pylab.grid(True) pylab.show()
import numpy as np import argparse import helper parser = argparse.ArgumentParser(description='Image Classifier') parser.add_argument('--inp_image',type = str, default = 'flowers/valid/1/image_06755.jpg', help = 'Path to dataset directory') parser.add_argument('--checkpoint',type=str,default='trained1.pth',help='Checkpoint') parser.add_argument('--gpu',type=str,default='cpu',help='GPU') parser.add_argument('--json_class',type=str,default='cat_to_name.json',help='JSON of key value') parser.add_argument('--top_k',type=int,default=5,help='Top k classes and probabilities') args=parser.parse_args() class_to_name= helper.load_class(args.json_class) model=helper.load(args.checkpoint) print(model) vals=torch.load(args.checkpoint) image = helper.process_image(args.inp_image) helper.imshow(image) probs, classes = helper.predict(args.inp_image, model, args.top_k, args.gpu) print(probs) print(classes) helper.display_image(args.inp_image, class_to_name, classes,probs)
print("add_to_group <last name> <first name> <group name>") print("help\t-prints this message") def get_contact(last): cont = thebook.search('last', last) if(len(cont) == 0): print("No contact with last name '{0}' found".format(last)) return None else: return(cont[0]) if(len(sys.argv) < 2): print("Need a contact book .yaml file") sys.exit() data = h.load(sys.argv[1]) thebook = Book(data, 'Book 1') run = True while(run): command_string = input('> ') args = command_string.split(' ') args.append("") if(args[0] == 'add' and args[1] == 'contact'): args[1] = args [2] args[0] = args [3] args[2] = input('home:') args[3] = input('work:')
def main(): modules = load("data/d1.txt") return sum(calculate_fuel(module_mass) for module_mass in modules)
A = array([[1, 2, 3], [4, 0, 0], [0, 0, 0], [0, 5, 0], [0, 0, 6], [9, 9, 9]], dtype=float64) B = array([1, 1, 1, -1, -1, -1], dtype=float64) # ... of type Real, LongInt and Byte feats_train = RealFeatures(A.transpose()) kernel = GaussianKernel(feats_train, feats_train, 1.0) kernel.io.set_loglevel(MSG_DEBUG) lab = Labels(B) svm = SVMLight(1, kernel, lab) svm.train() helper.save("/tmp/awesome_svm", svm) svm = helper.load("/tmp/awesome_svm") svm.train() #sys.exit(0) run = expenv.Run.get(1010) #run = expenv.Run.get(974) dat = run.get_train_data() print dat.keys() d = dat["thaliana"] subset_size = 20 examples = [i.example for i in d[0:subset_size]] labels = [i.label for i in d[0:subset_size]]
monoClk_last = 0 AFKTimer = 0 user_AFK = False ani = 0 lastFrame = 0 frame = False randomEvent = 0 xsteps = 1 ysteps = 1 nextXMove = 1 nextYMove = 1 idle = [0, 2, 0, 2, 0, 2] error = [0, 4, 0, 4, 0, 4] dance = [0, 1, 0, 1, 0, 1, 0, 3, 0, 3, 0, 3, 2, 1, 2, 3, 2, 3, 0, 2, 0, 2] companbot_x = companion.companbot(1, 0, 0, 100, 10, 1, 1) companbot_x = helper.load(companbot_x) #Input eater print("Eating Inputs") time.sleep(0.6) # Main Loop while True: try: buttons = minitft.buttons if not user_AFK: minitft.display.show(splash) minitft.backlight = 0.2 text_area.text = "Hunger:" + str( companbot_x.hunger) + " Level:" + str( companbot_x.lvl) + " Exp:" + str(companbot_x.xp) digest(companbot_x, False)
self.tag = None self.stage = None if __name__ == "__main__": log("start runTest") Env = EnvClass() Env.commit = os.environ['TRAVIS_COMMIT'] Env.commitRange = os.environ['TRAVIS_COMMIT_RANGE'] Env.branch = os.environ['TRAVIS_BRANCH'] Env.pullRequest = os.environ['TRAVIS_PULL_REQUEST'] Env.pullRequestBranch = os.environ['TRAVIS_PULL_REQUEST_BRANCH'] Env.tag = os.environ['TRAVIS_TAG'] Env.stage = os.environ['TRAVIS_BUILD_STAGE_NAME'] config = helper.load(ConfigFile) files = gitDiff.getFiles(Env) log("changed files: "+str(files)) scripts = [] if files == None: scripts = helper.getAllScripts(config) else: scripts = helper.getScripts(config, files) res = helper.execScripts(scripts) if res: exit(0) else: exit(1)
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ assert(param.base_similarity >= 1) # merge data sets data = PreparedMultitaskData(train_data, shuffle=False) # create shogun data objects base_wdk = shogun_factory.create_kernel(data.examples, param) lab = shogun_factory.create_labels(data.labels) # create normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) # load hard-coded task-similarity task_similarity = helper.load("/fml/ag-raetsch/home/cwidmer/svn/projects/alt_splice_code/src/task_sim_tis.bz2") # set similarity similarities = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for (i, task_name_lhs) in enumerate(data.get_task_names()): #max_value_row = max(task_similarity.get_row(task_name_lhs)) max_value_row = 1.0 for (j, task_name_rhs) in enumerate(data.get_task_names()): similarity = task_similarity.get_value(task_name_lhs, task_name_rhs) / max_value_row normalizer.set_task_similarity(i, j, similarity) similarities[i,j] = similarity pprint.pprint similarities # set normalizer #print "WARNING MTK disabled!!!!!!!!!!!!!!!!!!!!!" base_wdk.set_normalizer(normalizer) base_wdk.init_normalizer() # set up svm param.flags["svm_type"] = "svmlight" #fix svm type svm = shogun_factory.create_svm(param, base_wdk, lab) # make sure these parameters are set correctly #print "WARNING MTK WONT WORK WITH THESE SETTINGS!!!!!!!!!!!!!!!!!!!!!" svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) assert svm.get_linadd_enabled() == False, "linadd should be disabled" assert svm.get_batch_computation_enabled == False, "batch compute should be disabled" # start training svm.train() # save additional information self.additional_information["svm objective"] = svm.get_objective() self.additional_information["num sv"] = svm.get_num_support_vectors() #self.additional_information["distances"] = distances self.additional_information["similarities"] = similarities # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in data.get_task_names(): task_num = data.name_to_id(task_name) # save svm and task_num svms[task_name] = (task_num, svm) return svms
import torch.nn.functional as F import torch.optim as optim from Testmodel import TestModel from dataset import Dataset import helper as H testmodel = TestModel(input_dims=[2], output_dims=2, lr=0.01, num_layers=4, num_nodes=6) dataset = Dataset() X, y = dataset.preprocess() X_train, X_test, y_train, y_test = dataset.split(X, y, fraction=0.2) X_train, X_test = dataset.scale(X_train, X_test) train_loader, test_loader = H.load(X_train, X_test, y_train, y_test) acc, loss = testmodel.train(train_loader) print(acc, loss) acc, loss = testmodel.test(test_loader) print(acc, loss) ''' dataset = Dataset() X, y = dataset.preprocess() X_train, X_test, y_train, y_test = dataset.split( X, y, fraction=0.2) X_train, X_test = dataset.scale( X_train, X_test) train_loader, test_loader = H.load( X_train, X_test, y_train, y_test) input_dims = [X.shape[1]] output_dims = len(np.unique(y)) testmodel = TestModel.TestModel(input_dims=input_dims, output_dims=output_dims,
def test_point_wise(): train, dev, test = load(FLAGS.data, filter=FLAGS.clean) # wiki # train, test, dev = load(FLAGS.data, filter=FLAGS.clean) #trec q_max_sent_length = max(map(lambda x: len(x), test['question'].str.split())) a_max_sent_length = 2 print(q_max_sent_length) print(len(train)) print('train question unique:{}'.format(len(train['question'].unique()))) print('train length', len(train)) print('test length', len(test)) print('dev length', len(dev)) alphabet, embeddings = prepare([train, test, dev], max_sent_length=q_max_sent_length, dim=FLAGS.embedding_dim, is_embedding_needed=True, fresh=True) print('alphabet:', len(alphabet)) with tf.Graph().as_default(): with tf.device("/gpu:0"): # session_conf = tf.ConfigProto( # allow_soft_placement=FLAGS.allow_soft_placement, # log_device_placement=FLAGS.log_device_placement) session_conf = tf.ConfigProto() session_conf.allow_soft_placement = FLAGS.allow_soft_placement session_conf.log_device_placement = FLAGS.log_device_placement session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) now = int(time.time()) timeArray = time.localtime(now) timeStamp1 = time.strftime("%Y%m%d%H%M%S", timeArray) timeDay = time.strftime("%Y%m%d", timeArray) print(timeStamp1) with sess.as_default(), open(precision, "w") as log: log.write(str(FLAGS.__flags) + '\n') # train,test,dev = load("trec",filter=True) # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True) cnn = BiLSTM(max_input_left=q_max_sent_length, vocab_size=len(alphabet), embeddings=embeddings, embedding_size=FLAGS.embedding_dim, batch_size=FLAGS.batch_size, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, is_Embedding_Needed=True, trainable=FLAGS.trainable, overlap_needed=FLAGS.overlap_needed, position_needed=FLAGS.position_needed, pooling=FLAGS.pooling, hidden_num=FLAGS.hidden_num, extend_feature_dim=FLAGS.extend_feature_dim) cnn.build_graph() # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) saver = tf.train.Saver(tf.global_variables(), max_to_keep=20) sess.run(tf.global_variables_initializer()) map_max = 0.65 for i in range(FLAGS.num_epochs): datas = batch_gen_with_point_wise(train, alphabet, FLAGS.batch_size, q_len=q_max_sent_length, a_len=a_max_sent_length) for data in datas: feed_dict = { cnn.question: data[0], cnn.input_y: data[1], cnn.q_position: data[2], cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g} ".format( time_str, step, loss, accuracy)) now = int(time.time()) timeArray = time.localtime(now) timeStamp = time.strftime("%Y%m%d%H%M%S", timeArray) timeDay = time.strftime("%Y%m%d", timeArray) print(timeStamp1) print(timeStamp) predicted = predict(sess, cnn, train, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) predicted_label = np.argmax(predicted, 1) map_mrr_train = evaluation.evaluationBypandas_f1_acc( train, predicted[:, -1], predicted_label) predicted_test = predict(sess, cnn, test, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) predicted_label = np.argmax(predicted_test, 1) map_mrr_test = evaluation.evaluationBypandas_f1_acc( test, predicted_test[:, -1], predicted_label) if map_mrr_test[0] > map_max: map_max = map_mrr_test[0] timeStamp = time.strftime("%Y%m%d%H%M%S", time.localtime(int(time.time()))) folder = 'runs/' + timeDay out_dir = folder + '/' + timeStamp + \ '__' + FLAGS.data + str(map_mrr_test[0]) if not os.path.exists(folder): os.makedirs(folder) #save_path = saver.save(sess, out_dir) print("{}:train epoch:map mrr {}".format(i, map_mrr_train)) print("{}:test epoch:map mrr {}".format(i, map_mrr_test)) line2 = " {}:epoch: map_test{}".format(i, map_mrr_test) log.write(line2 + '\n') log.flush() log.close()
def test_pair_wise(dns=FLAGS.dns): train, test, dev = load(FLAGS.data, filter=FLAGS.clean) # train = train[:10000] # test = test[:10000] # dev = dev[:10000] # submit = submit[:1000] q_max_sent_length = max( map(lambda x: len(x), train['question'].str.split())) a_max_sent_length = max(map(lambda x: len(x), train['answer'].str.split())) print 'q_question_length:{} a_question_length:{}'.format( q_max_sent_length, a_max_sent_length) print 'train question unique:{}'.format(len(train['question'].unique())) print 'train length', len(train) print 'test length', len(test) print 'dev length', len(dev) alphabet, embeddings = prepare([train, test, dev], dim=FLAGS.embedding_dim, is_embedding_needed=True, fresh=FLAGS.fresh) # alphabet,embeddings = prepare_300([train,test,dev]) print 'alphabet:', len(alphabet) with tf.Graph().as_default(), tf.device("/gpu:" + str(FLAGS.gpu)): # with tf.device("/cpu:0"): session_conf = tf.ConfigProto() session_conf.allow_soft_placement = FLAGS.allow_soft_placement session_conf.log_device_placement = FLAGS.log_device_placement session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(), open(precision, "w") as log: log.write(str(FLAGS.__flags) + '\n') folder = 'runs/' + timeDay + '/' + timeStamp + '/' out_dir = folder + FLAGS.data if not os.path.exists(folder): os.makedirs(folder) # train,test,dev = load("trec",filter=True) # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True) print "start build model" cnn = QA_RNN_extend(max_input_left=q_max_sent_length, max_input_right=a_max_sent_length, batch_size=FLAGS.batch_size, vocab_size=len(alphabet), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, dropout_keep_prob=FLAGS.dropout_keep_prob, embeddings=embeddings, l2_reg_lambda=FLAGS.l2_reg_lambda, overlap_needed=FLAGS.overlap_needed, learning_rate=FLAGS.learning_rate, trainable=FLAGS.trainable, extend_feature_dim=FLAGS.extend_feature_dim, pooling=FLAGS.pooling, position_needed=FLAGS.position_needed, conv=FLAGS.conv) cnn.build_graph() saver = tf.train.Saver(tf.global_variables(), max_to_keep=20) train_writer = tf.summary.FileWriter(log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(log_dir + '/test') # Initialize all variables print "build over" sess.run(tf.global_variables_initializer()) print "variables_initializer" map_max = 0.65 for i in range(FLAGS.num_epochs): if FLAGS.dns == True: samples = dns_sample(train, alphabet, q_max_sent_length, a_max_sent_length, sess, cnn, FLAGS.batch_size, neg_sample_num=10) datas = batch_gen_with_pair_dns(samples, FLAGS.batch_size) print 'load dns datas' for data in datas: feed_dict = { cnn.question: data[0], cnn.answer: data[1], cnn.answer_negative: data[2] } _, step, loss, accuracy, score12, score13 = sess.run([ cnn.train_op, cnn.global_step, cnn.loss, cnn.accuracy, cnn.score12, cnn.score13 ], feed_dict) time_str = datetime.datetime.now().isoformat() print( "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}" .format(time_str, step, loss, accuracy, np.mean(score12), np.mean(score13))) line = "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}".format( time_str, step, loss, accuracy, np.mean(score12), np.mean(score13)) else: d = get_overlap_dict(train, alphabet, q_len=q_max_sent_length, a_len=a_max_sent_length) datas = batch_gen_with_pair_overlap( train, alphabet, FLAGS.batch_size, q_len=q_max_sent_length, a_len=a_max_sent_length, fresh=FLAGS.fresh, overlap_dict=d) print "load data" for data in datas: feed_dict = { cnn.question: data[0], cnn.answer: data[1], cnn.answer_negative: data[2], cnn.q_pos_overlap: data[3], cnn.q_neg_overlap: data[4], cnn.a_pos_overlap: data[5], cnn.a_neg_overlap: data[6], cnn.q_position: data[7], cnn.a_pos_position: data[8], cnn.a_neg_position: data[9] } _, summary, step, loss, accuracy, score12, score13 = sess.run( [ cnn.train_op, cnn.merged, cnn.global_step, cnn.loss, cnn.accuracy, cnn.score12, cnn.score13 ], feed_dict) train_writer.add_summary(summary, i) time_str = datetime.datetime.now().isoformat() print( "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}" .format(time_str, step, loss, accuracy, np.mean(score12), np.mean(score13))) line = "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}".format( time_str, step, loss, accuracy, np.mean(score12), np.mean(score13)) # print loss if i % 1 == 0: predicted_dev = predict(sess, cnn, dev, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) map_mrr_dev = evaluation.evaluationBypandas( dev, predicted_dev) predicted_test = predict(sess, cnn, test, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) map_mrr_test = evaluation.evaluationBypandas( test, predicted_test) print "{}:epoch:dev map mrr {}".format(i, map_mrr_dev) print "{}:epoch:test map mrr {}".format(i, map_mrr_test) line = " {}:epoch: map_dev{}-------map_mrr_test{}".format( i, map_mrr_dev[0], map_mrr_test) if map_mrr_dev[0] > map_max: map_max = map_mrr_dev[0] # timeStamp = time.strftime("%Y%m%d%H%M%S", time.localtime(int(time.time()))) save_path = saver.save(sess, out_dir) print "Model saved in file: ", save_path log.write(line + '\n') log.flush() print 'train over' saver.restore(sess, out_dir) predicted = predict(sess, cnn, train, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) train['predicted'] = predicted train['predicted'].to_csv('train.QApair.TJU_IR_QA2017_train.score', index=False, sep='\t') map_mrr_train = evaluation.evaluationBypandas(train, predicted) predicted_dev = predict(sess, cnn, dev, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) dev['predicted'] = predicted_dev dev['predicted'].to_csv('train.QApair.TJU_IR_QA2017_dev.score', index=False, sep='\t') map_mrr_dev = evaluation.evaluationBypandas(dev, predicted_dev) predicted_test = predict(sess, cnn, test, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) test['predicted'] = predicted_test test['predicted'].to_csv('train.QApair.TJU_IR_QA2017.score', index=False, sep='\t') map_mrr_test = evaluation.evaluationBypandas(test, predicted_test) print 'map_mrr train', map_mrr_train print 'map_mrr dev', map_mrr_dev print 'map_mrr test', map_mrr_test log.write(str(map_mrr_train) + '\n') log.write(str(map_mrr_test) + '\n') log.write(str(map_mrr_dev) + '\n') predict(sess, cnn, train[:100], alphabet, 20, q_max_sent_length, a_max_sent_length)
import constants as C import torch as T import numpy as np import torch.nn as nn import torch.nn.functional as F import torch.optim as optim use_cuda = T.cuda.is_available() device = T.device("cuda:0" if use_cuda else "cpu") dataset = Dataset() X, y = dataset.preprocess() X_train, X_test, y_train, y_test = dataset.split(X, y, fraction=0.3) X_train, X_test = dataset.scale(X_train, X_test) trainloader, testloader = H.load(X_train, X_test, y_train, y_test) mVar = Model(0.01, [12], 2, [8, 6, 6], ['relu', 'relu', 'relu'], trainloader, testloader).to(device) train_acc, train_loss = mVar.train() test_acc, test_loss = mVar.test() print(train_acc, train_loss) print(test_acc, test_loss) mVar.initialise([8, 6]) mVar = mVar.to(device) train_acc, train_loss = mVar.train() test_acc, test_loss = mVar.test() print(train_acc, train_loss) print(test_acc, test_loss)
def test_point_wise(): train, test, dev = load(FLAGS.data, filter=FLAGS.clean) train = train.fillna('') test = test.fillna('') dev = dev.fillna('') # submit = submit.fillna('') q_max_sent_length = max( map(lambda x: len(x), train['question'].str.split())) a_max_sent_length = max(map(lambda x: len(x), train['answer'].str.split())) # train = train[:1000] # test = test[:1000] # dev = dev[:1000] # submit = dev[:100] print 'train question unique:{}'.format(len(train['question'].unique())) print 'train length', len(train) print 'test length', len(test) print 'dev length', len(dev) alphabet, embeddings = prepare([train, test, dev], dim=FLAGS.embedding_dim, is_embedding_needed=True, fresh=True) print 'alphabet:', len(alphabet) with tf.Graph().as_default(): with tf.device("/gpu:0"): # session_conf = tf.ConfigProto( # allow_soft_placement=FLAGS.allow_soft_placement, # log_device_placement=FLAGS.log_device_placement) session_conf = tf.ConfigProto() session_conf.allow_soft_placement = FLAGS.allow_soft_placement session_conf.log_device_placement = FLAGS.log_device_placement session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(), open(precision, "w") as log: log.write(str(FLAGS.__flags) + '\n') # train,test,dev = load("trec",filter=True) # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True) cnn = QA(max_input_left=q_max_sent_length, max_input_right=a_max_sent_length, vocab_size=len(alphabet), embedding_size=FLAGS.embedding_dim, batch_size=FLAGS.batch_size, embeddings=embeddings, dropout_keep_prob=FLAGS.dropout_keep_prob, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, is_Embedding_Needed=True, trainable=FLAGS.trainable, overlap_needed=FLAGS.overlap_needed, position_needed=FLAGS.position_needed, pooling=FLAGS.pooling, extend_feature_dim=FLAGS.extend_feature_dim) cnn.build_graph() # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) starter_learning_rate = 0.001 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.96) optimizer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) saver = tf.train.Saver(tf.global_variables(), max_to_keep=20) # Initialize all variables sess.run(tf.global_variables_initializer()) # seq_process(train, alphabet) # seq_process(test, alphabet) map_max = 0.65 for i in range(30): d = get_overlap_dict(train, alphabet, q_len=q_max_sent_length, a_len=a_max_sent_length) datas = batch_gen_with_point_wise(train, alphabet, FLAGS.batch_size, overlap_dict=d, q_len=q_max_sent_length, a_len=a_max_sent_length) for data in datas: feed_dict = { cnn.question: data[0], cnn.answer: data[1], cnn.input_y: data[2], cnn.q_overlap: data[3], cnn.a_overlap: data[4], cnn.q_position: data[5], cnn.a_position: data[6] } _, step, loss, accuracy, pred, scores, see = sess.run([ train_op, global_step, cnn.loss, cnn.accuracy, cnn.predictions, cnn.scores, cnn.see ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g} ".format( time_str, step, loss, accuracy)) # print loss # predicted = predict(sess,cnn,train,alphabet,FLAGS.batch_size,q_max_sent_length,a_max_sent_length) # map_mrr_train = evaluation.evaluationBypandas(train,predicted[:,-1]) predicted = predict(sess, cnn, dev, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) map_mrr_dev = evaluation.evaluationBypandas( dev, predicted[:, -1]) predicted_test = predict(sess, cnn, test, alphabet, FLAGS.batch_size, q_max_sent_length, a_max_sent_length) map_mrr_test = evaluation.evaluationBypandas( test, predicted_test[:, -1]) if map_mrr_dev[0] > map_max: map_max = map_mrr_dev[0] timeStamp = time.strftime("%Y%m%d%H%M%S", time.localtime(int(time.time()))) folder = 'runs/' + timeDay out_dir = folder + '/' + timeStamp + '__' + FLAGS.data + str( map_mrr_dev[0]) if not os.path.exists(folder): os.makedirs(folder) save_path = saver.save(sess, out_dir) print "Model saved in file: ", save_path # predicted = predict(sess,cnn,dev,alphabet,FLAGS.batch_size,q_max_sent_length,a_max_sent_length) # map_mrr_dev = evaluation.evaluationBypandas(dev,predicted[:,-1]) # map_mrr_train = evaluation.evaluationBypandas(train,predicted_train[:,-1]) # print evaluation.evaluationBypandas(train,predicted_train[:,-1]) # print "{}:train epoch:map mrr {}".format(i,map_mrr_train) print "{}:dev epoch:map mrr {}".format(i, map_mrr_dev) print "{}:test epoch:map mrr {}".format(i, map_mrr_test) # line = " {}:epoch: map_train{}----map_test{}----map_dev{}".format(i,map_mrr_train[0],map_mrr_test[0],map_mrr_dev[0]) line = " {}:epoch: map_dev{}----map_test{}".format( i, map_mrr_dev[0], map_mrr_test[0]) log.write(line + '\n') log.flush() log.close()
def dev_point_wise(): if FLAGS.data == 'TREC' or FLAGS.data == 'sst2': train, dev, test = load_trec_sst2(FLAGS.data) else: train, dev = load(FLAGS.data) q_max_sent_length = max( map(lambda x: len(x), train['question'].str.split())) print(q_max_sent_length) print(len(train)) print('train question unique:{}'.format(len(train['question'].unique()))) print('train length', len(train)) print('dev length', len(dev)) if FLAGS.data == 'TREC' or FLAGS.data == 'sst2': alphabet, embeddings = prepare([train, dev, test], max_sent_length=q_max_sent_length, dim=FLAGS.embedding_dim, is_embedding_needed=True, fresh=True) else: alphabet, embeddings = prepare([train, dev], max_sent_length=q_max_sent_length, dim=FLAGS.embedding_dim, is_embedding_needed=True, fresh=True) print('alphabet:', len(alphabet)) with tf.Graph().as_default(): with tf.device("/gpu:0"): session_conf = tf.ConfigProto() session_conf.allow_soft_placement = FLAGS.allow_soft_placement session_conf.log_device_placement = FLAGS.log_device_placement session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) now = int(time.time()) timeArray = time.localtime(now) timeStamp1 = time.strftime("%Y%m%d%H%M%S", timeArray) timeDay = time.strftime("%Y%m%d", timeArray) print(timeStamp1) with sess.as_default(), open(precision, "w") as log: log.write(str(FLAGS.__flags) + '\n') cnn = CNN(max_input_left=q_max_sent_length, vocab_size=len(alphabet), embeddings=embeddings, embedding_size=FLAGS.embedding_dim, batch_size=FLAGS.batch_size, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, is_Embedding_Needed=True, trainable=FLAGS.trainable, dataset=FLAGS.data, extend_feature_dim=FLAGS.extend_feature_dim) cnn.build_graph() global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) sess.run(tf.global_variables_initializer()) acc_max = 0.0000 for i in range(FLAGS.num_epochs): datas = batch_gen_with_point_wise(train, alphabet, FLAGS.batch_size, q_len=q_max_sent_length) for data in datas: feed_dict = { cnn.question: data[0], cnn.input_y: data[1], cnn.q_position: data[2], cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g} ".format( time_str, step, loss, accuracy)) predicted = predict(sess, cnn, train, alphabet, FLAGS.batch_size, q_max_sent_length) predicted_label = np.argmax(predicted, 1) acc_train = accuracy_score(predicted_label, train['flag']) predicted_dev = predict(sess, cnn, dev, alphabet, FLAGS.batch_size, q_max_sent_length) predicted_label = np.argmax(predicted_dev, 1) acc_dev = accuracy_score(predicted_label, dev['flag']) if acc_dev > acc_max: tf.train.Saver().save(sess, "model_save/model", write_meta_graph=True) acc_max = acc_dev print("{}:train epoch:acc {}".format(i, acc_train)) print("{}:dev epoch:acc {}".format(i, acc_dev)) line2 = " {}:epoch: acc{}".format(i, acc_dev) log.write(line2 + '\n') log.flush() acc_flod.append(acc_max) log.close()
def get_presvm(B=2.0): examples_presvm = [numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 0.93099452, 0.38871617, 1.57968949, 1.25672527, -0.8123137 , 0.20786586, 1.378121 , 1.15598866, 0.80265343]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([-0.98028302, -0.23974489, 2.1687206 , 1.99338824, -0.67070205, -0.33167281, 1.3500379 , 1.34915685, 1.13747975]), numpy.array([ 0.67109612, 0.12662017, -0.48254886, -0.49091898, 1.31522237, -0.34108933, 0.57832179, -0.01992828, -0.26581628]), numpy.array([ 0.3193611 , 0.44903416, 3.62187778, 4.1490827 , 1.58832961, 1.95583397, 1.36836023, 1.92521945, 2.41114998])] labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0] examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012, 0.14621013, -0.50415481, 0.32317317, -0.00317602, -0.21422637]), numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116, 0.31639514, 0.32558471, 0.49364473, 0.04515591, -0.06963456]), numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 , 1.31488853, -0.33165365, 0.60176018, -0.00384946, -0.15603975]), numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552, 1.63314995, -0.51199338, 0.33340685, -0.0170519 , -0.19211039]), numpy.array([-0.18338766, -0.07783465, 0.42019824, 0.201753 , 2.01160098, 0.33326111, 0.75591909, 0.36631525, 0.1761829 ]), numpy.array([ 0.10273793, -0.02189574, 0.91092358, 0.74827973, 0.51882902, -0.1286531 , 0.64463658, 0.67468349, 0.55587266]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([-0.22494375, -0.15492964, 0.28017737, 0.29794467, 0.96403895, 0.43880289, 0.08053425, 0.07456818, 0.12102371]), numpy.array([-0.18161417, -0.17692039, 0.19554942, -0.00785625, 1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]), numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256, 0.94205875, 0.40162798, 0.00327328, -0.24107393])] labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0] examples_test = [numpy.array([-0.45159799, -0.11401394, 1.28574573, 1.09144306, 0.92253119, -0.47230164, 0.77032486, 0.83047366, 0.74768906]), numpy.array([ 0.42613105, 0.0092778 , -0.78640296, -0.71632445, 0.41154244, 0.88380309, 0.19475759, -0.14195876, -0.30479425]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017, -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]), numpy.array([ 1.23679696, 0.18753081, -0.25593329, -0.12051991, 0.64976989, -0.17184101, 0.14951337, 0.01988587, -0.0356698 ]), numpy.array([ 1.03355002, 0.05316195, -0.97905368, -0.75482121, 0.28673776, 2.27142733, 0.02654739, -0.31109851, -0.44555277]), numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257, 0.66947047, 0.05474302, -0.00717455, -0.17700575, -0.22253444]), numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609, 0.88518932, -0.51558669, -0.12000557, -0.32973613, -0.38488736]), numpy.array([ 0.8372111 , 0.06972199, -1.00454229, -0.79869642, 1.19376333, -0.40160273, -0.25122157, -0.46417918, -0.50234858]), numpy.array([-0.36325018, -0.12206184, 0.10525247, -0.15663416, 1.03616948, -0.51699463, 0.59566286, 0.35363369, 0.10545559])] ############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() #return presvm_liblinear #def get_da_svm(presvm_liblinear): ############################################# # compute linear term manually ############################################# examples = numpy.array(examples, dtype=numpy.float64) examples = numpy.transpose(examples) feat = RealFeatures(examples) lab = Labels(numpy.array(labels)) dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B) dasvm_liblinear.set_bias_enabled(False) dasvm_liblinear.train() helper.save("/tmp/svm", presvm_liblinear) presvm_pickle = helper.load("/tmp/svm") dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B) dasvm_pickle.set_bias_enabled(False) dasvm_pickle.train() helper.save("/tmp/dasvm", dasvm_liblinear) dasvm_pickle2 = helper.load("/tmp/dasvm") ############################################# # load test data ############################################# examples_test = numpy.array(examples_test, dtype=numpy.float64) examples_test = numpy.transpose(examples_test) feat_test = RealFeatures(examples_test) # check if pickled and unpickled classifiers behave the same out1 = dasvm_liblinear.classify(feat_test).get_labels() out2 = dasvm_pickle.classify(feat_test).get_labels() # compare outputs for i in xrange(len(out1)): try: assert(abs(out1[i]-out2[i])<= 0.001) except: print "(%.5f, %.5f)" % (out1[i], out2[i]) print "classification agrees."
def perform_orange_clustering(mss_id): import orange from task_similarities import TreeNode import helper #(dist_full, id_to_name) = compute_task_distances(mss_id) p = '/fml/ag-raetsch/home/cwidmer' (dist_full, id_to_name) = helper.load(p + "/dist") l = [] for i in range(len(dist_full)): l.append([]) for j in range(i+1,len(dist_full)): l[i].append(dist_full[i,j]) l.reverse() m = orange.SymMatrix(l) root = orange.HierarchicalClustering(m, linkage=orange.HierarchicalClustering.Average) root_node = TreeNode("root") clusters = [root] nodes = [root_node] while len(clusters) > 0: cluster = clusters.pop(0) node = nodes.pop(0) # append nodes if non-empty if cluster.left: clusters.append(cluster.left) name = str(tuple(cluster.left)) if len(tuple(cluster.left))==1: name = id_to_name[tuple(cluster.left)[0]] print name # create nodes left_node = TreeNode(name) node.add_child(left_node, 1.0) nodes.append(left_node) # append nodes if non-empty if cluster.right: clusters.append(cluster.right) name = str(tuple(cluster.right)) if len(tuple(cluster.right))==1: name = id_to_name[tuple(cluster.right)[0]] print name # create nodes right_node = TreeNode(name) node.add_child(right_node, 1.0) nodes.append(right_node) return root_node
import tensorflow as tf import helper import sys, skvideo.io, json, base64 import cv2 import numpy as np batch_size = 300 num_classes = 3 image_input, keep_prob, logits, graph1 = helper.load('/tmp/frozen_graph_R3.pb') def encode(array): array2 = cv2.resize(array, (800, 600)) retval, buffer = cv2.imencode('.png', array2) return base64.b64encode(buffer).decode("utf-8") file = sys.argv[-1] video = skvideo.io.vread(file) answer_key = {} frame = 1 tester = tf.nn.softmax(logits) with tf.Session(graph=graph1) as sess: for batch_i in range(0, len(video), batch_size): result = [] for rgb_frame in video[batch_i:batch_i + batch_size]: image = cv2.resize(rgb_frame, (256, 192)) result.append(image)
timeStamp = time.strftime("%Y%m%d%H%M%S", timeArray) timeDay = time.strftime("%Y%m%d", timeArray) log_dir = 'log/' + timeDay if not os.path.exists(log_dir): os.makedirs(log_dir) program = os.path.basename("sentence classfication") logger = logging.getLogger(program) #%% 获得配置 FLAGS = config.flags.FLAGS opts = FLAGS.flag_values_dict() for item in opts: logger.info('{} : {}'.format(item, opts[item])) #%% ############## 数据载入 ################# logger.info('load data ...........') train, dev, test = helper.load(opts['data_dir']) max_sent_length = max(map(lambda x: len(x), train['question'].str.split())) max_sent_length = 33 #%% ############## 数据预处理 ############### alphabet, embeddings, = helper.prepare([train, test, dev], dim=opts['embedding_dim']) #%% ############## 模型 ################# opts['embeddings'] = embeddings opts['max_input_sentence'] = max_sent_length opts['vocab_size'] = len(alphabet)
B=array([1,1,1,-1,-1,-1], dtype=float64) # ... of type Real, LongInt and Byte feats_train = RealFeatures(A.transpose()) kernel = GaussianKernel(feats_train, feats_train, 1.0) kernel.io.set_loglevel(MSG_DEBUG) lab = Labels(B) svm = SVMLight(1, kernel, lab) svm.train() helper.save("/tmp/awesome_svm", svm) svm = helper.load("/tmp/awesome_svm") svm.train() #sys.exit(0) run = expenv.Run.get(1010) #run = expenv.Run.get(974) dat = run.get_train_data() print dat.keys() d = dat["thaliana"] subset_size = 20
args = parser.parse_args() if args.verbose: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) elif args.quiet: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.CRITICAL) else: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING) if args.simple: latex.configure(simpleMode=True) if path.sep in args.source: sourcefolder = args.source.rsplit(path.sep,1)[0] else: sourcefolder = "" notice = "% Warning: Editing this file directly can cause erratic behaviour in the compiler.\n" sourcefile = helper.load(args.source) if include_folder in args.destination: output = Scanner().scan(sourcefile) helper.write(args.destination, notice + output) else: with cd(sourcefolder): output = Scanner().scan(sourcefile) helper.write(args.destination, notice + output)