def modify_dimer(self): """ setup ao overlap calc. """ # load interface data from previous and current step filename = self.files i_pre = tools.load_data(filename['previous']) i_cur = tools.load_data(filename['current']) t = copy.deepcopy(self.template_cmp) # only the high model is required to calc. ao # modify charge & spin (kept) charge = [] for c in t['charge']: charge.append(c * 2) t['charge'] = charge # molecular spec. mol = self.__merge_mols([i_cur['mol'], i_pre['mol']]) t['mol'] = mol # routine routine = t['routine'] theory = "HF" basis = routine['basis'] model = theory + "/" + basis other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full" t['routine']['content'] = "#" + model + " " + other # delete connect if 'connect' in t.keys(): del t['connect'] # recover template self.template = t return
def modify_dimer(self): """ setup ao overlap calc. """ # load interface data from previous and current step filename = self.files i_pre = tools.load_data(filename['previous']) i_cur = tools.load_data(filename['current']) t = copy.deepcopy(self.template_cmp) # modify charge & spin (kept) t['charge'] = t['charge'] * 2 # molecular spec. mol = self.__merge_mols([i_cur['mol'], i_pre['mol']]) t['mol'] = mol # routine routine = t['routine'] theory = "HF" basis = routine['basis'] model = theory + "/" + basis other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full" t['routine']['content'] = "#" + model + " " + other # recover template self.template = t return
def modify(self): """ setup ao overlap calc. """ print "AO the Working Directory is:\n", os.getcwd() # load interface data from previous and current step i_pre = tools.load_data(self.files['previous']) i_cur = tools.load_data(self.files['current']) t = copy.deepcopy(self.template_cmp['high-model']['template']) # only the high model is required to calc. ao # modify charge & spin (kept) t['charge'] *= 2 # molecular spec. t['mol'] = self.merge_mol([i_cur['mol'], i_pre['mol']], t['region']) # routine routine = t['routine'] theory = "HF" basis = routine['basis'] print basis model = theory + "/" + basis other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full" t['routine']['content'] = "# " + model + " " + other # delete connect if 'connect' in t.keys(): del t['connect'] # recover template self.template['high-model']['template'] = t return
def modify_dimer(self): """ setup ao overlap calc. """ # load interface data from previous and current step filename = self.files i_pre = tools.load_data(filename['previous']) i_cur = tools.load_data(filename['current']) t = copy.deepcopy(self.template_cmp) # modify charge & spin (kept) t['charge'] = t['charge']*2 # molecular spec. mol = self.__merge_mols([i_cur['mol'], i_pre['mol']]) t['mol'] = mol # routine routine = t['routine'] theory = "HF" basis = routine['basis'] model = theory + "/" + basis other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full" t['routine']['content'] = "#" + model + " " + other # recover template self.template = t return
def periodical_load(t1, interval): # t1 = str(get_ts() - interval) t2 = str(get_ts()) load_data(t2) periodical_load.cnt += 1 if periodical_load.cnt > 1: path = os.path.join('Dataset', 'dataset_%s-%s.arff' % (t1, t2)) generate_training_file(path, t1, t2) Timer(interval, periodical_load, [t2, interval]).start()
def load(self): """ load template.json and interface.json """ filename1 = self.files['template'] filename2 = self.files['interface'] obj_1 = tools.load_data(filename1) obj_2 = tools.load_data(filename2) self.template = copy.deepcopy(obj_1) self.interface = copy.deepcopy(obj_2)
def benchmark_solution(): train=load_data('train.csv') test=load_data('test.csv') lbl_enc = preprocessing.LabelEncoder() train['target'] = lbl_enc.fit_transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=train[feature_cols] y=train['target'] X_test=test[feature_cols] test_ids=test['id'] print "benchmark solution" cross_v(get_rf(),X_train.values,y.values)#0.596256539386
def prepare(self): """ load configure file """ # dynamic info. self.dyn = tools.load_data(self.files['dyn']) # gaussian directory structure info. #script_dir = os.path.split(os.path.realpath(sys.argv[0]))[0] script_dir = os.path.split(os.path.realpath(__file__))[0] self.config = tools.load_data(script_dir + "/config.in") self.config['root'] = os.getcwd() # attach dyn info in config vars self.config.update(self.dyn['quantum']) return
def prepare(self): """ load configure file """ # dynamic info. self.dyn = tools.load_data(self.files['dyn']) # molpro directory structure info. script_dir = os.path.split(os.path.realpath(__file__))[0] self.config = tools.load_data(script_dir + "/config.in") self.config['root'] = os.getcwd() self.config.update(self.dyn['quantum']) return
def __init__(self, pop_size, elite_size, K, upper, lower, seconds): self.train_data, self.train_targets = load_data("cwk_train") self.test_data, self.test_targets = load_data("cwk_test") self.train_targets = list(self.train_targets) self.test_targets = list(self.test_targets) self.pop_size = pop_size self.elite_size = elite_size self.upper = upper self.lower = lower self.seconds = seconds self.K = K self.population = []
def prepare(self): """ load configure file """ # dynamic info. self.dyn = tools.load_data(self.files['dyn']) # gaussian directory structure info. #script_dir = os.path.split(os.path.realpath(sys.argv[0]))[0] script_dir = os.path.split(os.path.realpath(__file__))[0] self.config = tools.load_data(script_dir + "/config.in") self.config['root'] = os.getcwd() # attach dyn info in config vars self.config.update(self.dyn['quantum']) return
def __init__(self, inertia, cognitive, social, num_particles): self.train_data, self.train_targets = load_data("cwk_train") self.train_targets = list(self.train_targets) self.test_data, self.test_targets = load_data("cwk_test") self.test_targets = list(self.test_targets) self.num_particles = num_particles self.inertia = inertia self.cognitive = cognitive self.social = social # initialise population self.particles = [] for i in range(self.num_particles): self.particles.append(Particle())
def loadData(datatype): if datatype == 'gtsrb_binary': x_train, x_test, y_train, y_test = load_data('gtsrb_binary', 2) input_shape = 3 * 48 * 48 elif datatype == 'cifar10_binary': x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2) input_shape = 3 * 32 * 32 elif datatype == 'cifar10': x_train, x_test, y_train, y_test = load_data('cifar10', 2) input_shape = 3 * 32 * 32 return x_train, x_test, y_train, y_test, input_shape
def loadData(datatype): if datatype is 'gtsrb_binary': x_train, x_test, y_train, y_test = load_data('gtsrb_binary', 2) x_train = x_train.reshape((-1, 3, 48, 48)) x_test = x_test.reshape((-1, 3, 48, 48)) input_shape = 3*48*48 elif datatype is 'cifar10_binary': x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2) x_train = x_train.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32) x_test = x_test.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32) input_shape = 3*32*32 return x_train, x_test, y_train, y_test, input_shape
def main_train(key, noise, divide_num=5): """在指定噪声率下,训练,并返回其精度、标准差""" temp_acc = [] print(key, "数据集 ", "噪声率为:", noise, sep="") for i in range(divide_num): train, test = load_data(key, 0.30, noise) acc0 = 0 acc1 = 0 # 调试RD阈值 clean_train = relative_density(train) # RD acc2 = kernel_LR(clean_train, test) temp_acc.append([acc0, acc1, acc2]) print("klr:", acc0, "crf:", acc1, "rd:", acc2) mean_acc = np.mean(temp_acc, axis=0) round_acc = np.round(mean_acc, 4) std_acc = np.std(temp_acc, axis=0) round_accstd = np.round(std_acc, 4) print("KLR:%.4lf" % (round_acc[0]), "CRF:%.4lf" % (round_acc[1]), "RD:%.4lf" % (round_acc[2]), "KLR:%.4lf" % (round_accstd[0]), "CRF:%.4lf" % (round_accstd[1]), "RD:%.4lf" % (round_accstd[2])) print() return mean_acc, round_acc, round_accstd
def main_train(key, noise, divide_num=5): """在指定噪声率下,训练,并返回其精度、标准差""" temp_acc = [] print(key, "数据集 ", "噪声率为:", noise, sep="") for i in range(divide_num): train, test = load_data(key, 0.30, noise) acc0 = 0 acc1 = CRFNFL_SVM(train, test) # 调用随机森林 acc2 = 0 temp_acc.append([acc0, acc1, acc2]) print("ksvm:", acc0, "crf:", acc1, "rd:", acc2) mean_acc = np.mean(temp_acc, axis=0) round_acc = np.round(mean_acc, 4) std_acc = np.std(temp_acc, axis=0) round_accstd = np.round(std_acc, 4) print("KSVM:%.4lf" % (round_acc[0]), "CRF:%.4lf" % (round_acc[1]), "RD:%.4lf" % (round_acc[2]), "KSVM:%.4lf" % (round_accstd[0]), "CRF:%.4lf" % (round_accstd[1]), "RD:%.4lf" % (round_accstd[2])) print() return mean_acc, round_acc, round_accstd
def load(self, filename = "template.json"): """ load template.json """ obj = tools.load_data(filename) return obj
def run(self): """ raise the calc. code. """ # load interface file interface = tools.load_data(self.files['interface']) it = int(interface['parm']['i_time']) label_ZN = int(self.dyn['control']['label_ZN']) qm_method = int(self.dyn['control']['qm_method']) config = self.config # Start the QC calculations if qm_method == 2 or qm_method == 3: molpro_template(config) molpro_run(label_ZN, config) else: print "QM method : error: no such method", qm_method sys.exit(1) return
def run(self): """ raise the calc. code. """ # load interface file interface = tools.load_data(self.files['interface']) it = int(interface['parm']['i_time']) qm_method = int(self.dyn['control']['qm_method']) config = self.config # Start the QC calculations (CIS, TDHF, TDDFT) if qm_method == 11: # Do electronic structure calculation at time zero if it == 0: # call zero time # % make template: dump gjf in json format gau_template(config) # % call gaussian gau_zero(config) elif it > 0: gau_nonzero(config) print "now work dir:", os.getcwd() gau_overlap(config) nac = gau_nac(config) else: print "Error: keyword 'it':", it sys.exit(0) else: print "QM method : error: no such method", qm_method sys.exit(1) return
def load(self, filename="template.json"): """ load template.json """ obj = tools.load_data(filename) return obj
def run(self): """ raise the calc. code. """ # load interface file interface = tools.load_data(self.files['interface']) it = int(interface['parm']['i_time']) qm_method = int(self.dyn['control']['qm_method']) config = self.config # Start the QC calculations (CIS, TDHF, TDDFT) if qm_method == 11: # Do electronic structure calculation at time zero if it == 0: # call zero time # % make template: dump gjf in json format gms_template(config) # % call gaussian gms_zero(config) elif it > 0: gms_nonzero(config) print "now work dir:", os.getcwd() gms_overlap(config) gms_nac(config) else: print "Error: keyword 'it':", it sys.exit(0) else: print "QM method : error: no such method" , qm_method sys.exit(1) return
def main(): print("Analysis of the US Congress members from 1947 to 2014") print() data = tools.load_data() print("The 5 oldest members of Congress at the beginning of " "their mandate") for i, member in enumerate(tools.find_oldest(data)[:5]): print(f'{i + 1}. {member.firstname} {member.lastname} who started in ' f'{member.termstart} at {member.age}') print() print("The 5 youngest members of Congress at the beginning of " "their mandate") for i, member in enumerate(tools.find_youngest(data)[:5]): print(f'{i + 1}. {member.firstname} {member.lastname} who started in ' f'{member.termstart} at {member.age}') print() print("The 5 oldest members of Congress at the beginning of " "their mandate who are Rep.") for i, member in enumerate(tools.find_oldest_rep(data)[:5]): print(f'{i + 1}. {member.firstname} {member.lastname} who started in ' f'{member.termstart} at {member.age}') print() print("The 5 youngest members of Congress at the beginning of " "their mandate who are Dem.") for i, member in enumerate(tools.find_youngest_dem(data)[:5]): print(f'{i + 1}. {member.firstname} {member.lastname} who started in ' f'{member.termstart} at {member.age}')
def run(self): """ raise the calc. code. """ # load interface file interface = tools.load_data(self.files['interface']) it = int(interface['parm']['i_time']) label_ZN = int(self.dyn['control']['label_ZN']) qm_method = int(self.dyn['control']['qm_method']) config = self.config # Start the QC calculations (MRCI) if qm_method == 1: # % make template: dump input in json format mndo_template(config) # % call mndo print "now work dir:", os.getcwd() mndo_run(label_ZN, config) else: print "QM method : error: no such method", qm_method sys.exit(1) return
def plot_cor(data): """Plot pairwise correlations of features in the given dataset""" from matplotlib import cm cols = data.columns.tolist() fig = plt.figure(figsize=(12,12)) ax = fig.add_subplot(111) # Plot absolute value of pairwise correlations since we don't # particularly care about the direction of the relationship, # just the strength of it cax = ax.matshow(data.corr().abs(), cmap=cm.YlOrRd) fig.colorbar(cax) ax.set_xticks(np.arange(len(cols))) ax.set_yticks(np.arange(len(cols))) ax.set_xticklabels(cols) ax.set_yticklabels(cols) ax.set_title("Correlation Matrix of Features") #def main(): train=load_data('train.csv') cols = [col for col in train.columns if col not in ['id','target']] features=['feat_34','feat_11','feat_40','feat_26','feat_60','feat_25','feat_86','feat_15','feat_90','feat_14','feat_42','feat_67','feat_62','feat_36','feat_24','target'] #xCol = 'target' #for col in cols: # plotHist(train, col,xCol) x_data=train[features] #plot_cor(x_data) y=train['target'] class1=y[[y=='class1']]
def load(self): """ load interface.json """ filename = self.files['interface'] obj = tools.load_data(filename) self.interface = copy.deepcopy(obj)
def run(self): """ raise the calc. code. """ # load interface file interface = tools.load_data(self.files['interface']) it = int(interface['parm']['i_time']) qm_method = int(self.dyn['quantum']['qm_method']) print "QM_METHOD: ", qm_method config = self.config # # make template Template(self.config) # Start the QC calculations if it == 0: firstStep(config) buildSOC(config) buildNAC(config) elif it > 0: nextStep(config) buildSOC(config) buildNAC(config) else: print "Error: keyword 'it':", it sys.exit(0) return
def feature_engineering_solution(): train=load_data('train.csv') test=load_data('test.csv') le = preprocessing.LabelEncoder() le.fit(train['target']) train['target']=le.transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=feature_engineering(train[feature_cols]) X_test=feature_engineering(test[feature_cols]) feature_cols= [col for col in X_train.columns]#std 0.607958003167 mean 0.615741311533 X_train=X_train[feature_cols] X_test=X_test[feature_cols] y=train['target'] test_ids=test['id'] print 'feature_engineering_solution' cross_v(get_rf(),X_train.values,y.values)#0.600017926514
def load(self): """ load template.json """ filename = self.files['template'] obj = tools.load_data(filename) self.template = copy.deepcopy(obj) self.template_cmp = copy.deepcopy(obj) return obj
def main(): """ Main routine of PINK data preconditioning """ parser = argparse.ArgumentParser(description='PINK data preconditioning') parser.add_argument('data', help='Data input file (.npy or .bin)', action=tools.check_extension({'npy', 'bin'})) parser.add_argument('-o', '--output', help='Data output file') parser.add_argument('-v', '--verbose', action='store_true', help='Be talkative') parser.add_argument( '-s', '--scale', action='store_true', help='Scale the input data to be within the range [0, 1]') args = parser.parse_args() if os.path.splitext(args.data)[1][1:] == "npy": data = np.load(args.data).astype(np.float32) elif os.path.splitext(args.data)[1][1:] == "bin": data = tools.load_data(args.data) print('shape: ', np.shape(data)) print('size: ', data.size) print('min value: ', np.amin(data)) print('max value: ', np.amax(data)) print('non-zero elements: ', np.count_nonzero(data)) print('sparsity: ', np.count_nonzero(data) / data.size) if args.scale: print('Data will be linearly scaled to be within the range [0.0, 1.0]') min_element = np.amin(data) max_element = np.amax(data) factor = 1 / (max_element - min_element) print('min value: ', min_element) print('max value: ', max_element) print('factor: ', factor) data = (data - min_element) * factor print('min value: ', np.amin(data)) print('max value: ', np.amax(data)) if args.output: print('Output file written at', args.output) if os.path.splitext(args.output)[1][1:] == "npy": np.save(args.output, data) elif os.path.splitext(args.output)[1][1:] == "bin": tools.save_data(args.output, data) else: raise RuntimeError('Unsupported output file extension: ', os.path.splitext(args.output)[1][1:]) print('All done.')
def model_selection_solution(): train=load_data('train.csv') test=load_data('test.csv') le = preprocessing.LabelEncoder() le.fit(train['target']) train['target']=le.transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=train[feature_cols] X_test=test[feature_cols] y=train['target'] test_ids=test['id'] print 'gbrt_tuned_selection_solution' #cross_v(get_tuned_gb(),X_train.values,y.values) clf=get_tuned_gb() clf.fit(X_train,y) preds = clf.predict_proba(X_test) write_submission(test_ids,preds,'submissions/gbrt_tuned_selection_solution.csv')
def do_train(): train_data = load_data('./data/train.txt') val_data = load_data('./data/val.txt') net = Net() model = net.get_model() evaluator = Evaluator(val_data, config.model_save_to, model, net.CRF, net.NER) train_generator = data_generator(train_data, config.batch_size) history = model.fit_generator(train_generator.forfit(), steps_per_epoch=len(train_generator), epochs=config.epochs, callbacks=[evaluator]) return history
def parameter_tuning_solution(): train=load_data('train.csv') test=load_data('test.csv') le = preprocessing.LabelEncoder() le.fit(train['target']) train['target']=le.transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=train[feature_cols] X_test=test[feature_cols] y=train['target'] test_ids=test['id'] print 'parameter_tuning_solution800_6' cross_v(get_tuned_rf(),X_train.values,y.values)#0.546637992781 clf=get_tuned_rf() clf.fit(X_train,y) preds = clf.predict_proba(X_test) write_submission(test_ids,preds,'submissions/parameter_tuning_solution800_6.csv')
def attack(): x_train, x_test, y_train, y_test = load_data('cifar10', 2) min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) s = time.time() # model = BNN(['../binary/checkpoints/cifar10_mlpbnn_approx_%d.h5' % (i) for i in range(100)]) model = BNN([ '../binary/checkpoints/cifar10_mlpbnn_approx_ep004_%d.h5' % (i) for i in range(100) ]) pred_y = model.predict(x_test) print('pred_y: ', pred_y) np.savetxt('pred_y', pred_y) np.savetxt('y_test', y_test) print('pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1]', pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1]) print('Accuracy: ', accuracy_score(y_true=y_test, y_pred=pred_y)) # Create a model wrapper predictWrapper = modelWrapper(model) classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(32 * 32 * 3, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data -----') attack = BoundaryAttack(estimator=classifier, targeted=False, delta=0.01, epsilon=0.01, max_iter=100, num_trial=100, sample_size=100, init_size=100) print('----- generate adv test data -----') x_test = x_test[288] # Input data shape should be 2D x_test = x_test.reshape((-1, 32 * 32 * 3)) x_test_adv = attack.generate(x=x_test) print('x_test ', x_test) print('x_test_adv ', x_test_adv) dist2 = utils.computeDist2(x_test, x_test_adv) print('test data dist2: ', dist2) distInf = utils.computeDistInf(x_test, x_test_adv) print('test data distInf: ', distInf) print('Cost time: ', time.time() - s)
def get_standard_template(file_string, key_frame, action_type, action_num): #key_frame为一个列表,存储两个相邻分解动作的最后一帧和第一帧 data = load_data(file_string) ang_data = trans_data(data) f = open(action_type + '.txt', 'w') for i in range(len(key_frame) / 2): for j in range(len(ang_data)): cur_W = curve_fit(ang_data[j][key_frame[2 * i] - action_num:key_frame[2 * i]])
def load_coord(self): """ load two set of coord. of the system """ oldsetfile = self.files['compare'] self.vars['geom'] = tools.load_data(oldsetfile) return
def prepare(self): """ first, prepare work dir; then, the necessary files. """ # load internal data. filename = self.files['interface'] it = tools.load_data(filename) self.dim = it['parm'] return
def load(self): """ load template.json """ filename = self.files['template'] obj = tools.load_data(filename) self.template = copy.deepcopy(obj) self.template_cmp = copy.deepcopy(obj) return obj
def rf_calibration_solution(): train=load_data('train.csv') test=load_data('test.csv') le = preprocessing.LabelEncoder() le.fit(train['target']) train['target']=le.transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=train[feature_cols] X_test=test[feature_cols] y=train['target'] test_ids=test['id'] print 'rf_calibration_solution' skf = cross_validation.StratifiedKFold(y, n_folds=5, random_state=42) calibration_method = 'isotonic' clf=get_tuned_rf() ccv = CalibratedClassifierCV(base_estimator=clf, method=calibration_method, cv=skf) ccv.fit(X_train,y) preds = ccv.predict_proba(X_test) write_submission(test_ids,preds,'submissions/rf_calibration_solution.csv')
def modify_td(self): """ tddft input, nstates & root would be updated. for oniom feature """ # current interface file data. i_cur = tools.load_data(self.files['interface']) t = copy.deepcopy(self.template_cmp) # %charge & spin was kept. none was requred. # %molecular spec. t['mol'] = i_cur['mol'] # %routine # in dyn. interface, gs was 1, the first-es is 2, et al. # so there are n_es + 1 states # but gaussian, root=1 is first es. # so, x - 1 is ok n_state = int(i_cur['parm']['n_state']) - 1 i_state = (i_cur['parm']['i_state']) - 1 routine = t['routine'] content = routine['content'] pat = re.compile("nstates=(\d+)", re.IGNORECASE) content = re.sub(pat, "nstates=" + str(n_state), content) pat = re.compile("root=(\d+)", re.IGNORECASE) content = re.sub(pat, "root=" + str(i_state), content) # calc force routine of gs if required pat = re.compile("(td\(.+?\))|(tda\(.+?\))|(cis\(.+?\))", re.IGNORECASE) force_content = re.sub(pat, "", content) # gs single point calc. pat = re.compile("force", re.IGNORECASE) sp_content = re.sub(pat, "", force_content) # new content routine for es pat = re.compile("force", re.IGNORECASE) es_content = re.sub(pat, " ", content) # suppose the first occurance of */* is like b3lyp/6-31G* style pat = re.compile("\/[\S]+") es_content = re.sub(pat, "/ChkBasis", es_content, count=1) pat = re.compile("\/[\S]+") force_content = re.sub(pat, "/ChkBasis", force_content, count=1) # assign value t['routine']['content'] = sp_content t['routine']['es_content'] = content + " geom=AllCheck Guess=Read " if i_state == 0: t['routine'][ 'force_content'] = force_content + " geom=AllCheck Guess=Read " t['routine'][ 'es_content'] = es_content + " geom=AllCheck Guess=Read " # recover template self.template = t return
def modify_dimer(self): """ setup ao overlap calc. """ # load interface data from previous and current step filename = self.files i_pre = tools.load_data(filename['previous']) i_cur = tools.load_data(filename['current']) t = copy.deepcopy(self.template_cmp) # $data section in gms. # molecular spec. mol = self.__merge_mols([i_cur['mol'], i_pre['mol']]) t['@DATA']['mol'] = mol t['@DATA']['title'] = "ONLY CHECK CALC, IGNORE WARINING.." # re-build $data section. # modify charge & spin (kept) # at_contrl = t['@CONTRL'] if 'MULT' in at_contrl: mol_spin = int(at_contrl['MULT']) else: mol_spin = 1 if "ICHARG" in at_contrl: mol_chrg = int(at_contrl['ICHARG']) * 2 else: mol_chrg = 0 t['CONTRL'] = " $contrl scftyp=rhf runtyp=energy exetyp=check\n \ NPRINT=3 MULT=%d ICHARG=%d NPRINT=3 $end\n" % (mol_spin, mol_chrg) # tddft is not necessary, so... if 'TDDFT' in at_contrl: del t['TDDFT'] # recover template self.template = t self.__geom_data() return
def modify_dimer(self): """ setup ao overlap calc. """ # load interface data from previous and current step filename = self.files i_pre = tools.load_data(filename['previous']) i_cur = tools.load_data(filename['current']) t = copy.deepcopy(self.template_cmp) # $data section in gms. # molecular spec. mol = self.__merge_mols([i_cur['mol'], i_pre['mol']]) t['@DATA']['mol'] = mol t['@DATA']['title'] = "ONLY CHECK CALC, IGNORE WARINING.." # re-build $data section. # modify charge & spin (kept) # at_contrl = t['@CONTRL'] if 'MULT' in at_contrl: mol_spin = int(at_contrl['MULT']) else: mol_spin = 1 if "ICHARG" in at_contrl: mol_chrg = int(at_contrl['ICHARG']) * 2 else: mol_chrg = 0 t['CONTRL'] = " $contrl scftyp=rhf runtyp=energy exetyp=check\n \ NPRINT=3 MULT=%d ICHARG=%d NPRINT=3 $end\n" % (mol_spin, mol_chrg) # tddft is not necessary, so... if 'TDDFT' in at_contrl: del t['TDDFT'] # recover template self.template = t self.__geom_data() return
def loadData(datatype): if datatype == 'gtsrb_binary': x_train, x_test, y_train, y_test = load_data('gtsrb_binary', 2) input_shape = 3*48*48 elif datatype == 'cifar10_binary': x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2) input_shape = 3*32*32 elif datatype == 'cifar10': x_train, x_test, y_train, y_test = load_data('cifar10', 2) x_train = x_train.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32) x_test = x_test.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32) input_shape = 3*32*32 elif datatype == 'imagenet': x_train, x_test, y_train, y_test = load_data('imagenet', 2) x_train = x_train.reshape((-1, 3, 224, 224)) x_test = x_test.reshape((-1, 3, 224, 224)) input_shape = 3*224*224 return x_train, x_test, y_train, y_test, input_shape
def main(): train=load_data('train.csv') lbl_enc = preprocessing.LabelEncoder() train['target'] = lbl_enc.fit_transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train,y_train=split_data(train,feature_cols) clf_scores=clf_score(create_clf(),X_train[feature_cols],y_train) print clf_scores plt.plot(clf_scores) plt.xticks(range(len(clf_scores)), clf_scores.index, fontsize=14, rotation=90) plt.show()
def __init__(self): self.data = tools.load_data(filename = "interface.json") # interface_converter(filename = qm_interface) self.files = {"interface": "interface.json", "dyn": "inp.json"} # global control variable, not very useful now for the case of turbomole self.config = {} self.dyn = {} self.worker() return
def modify_td_gs(self): """ tddft input, nstates & root would be updated. """ # current interface file data. i_cur = tools.load_data(self.files['interface']) t = copy.deepcopy(self.template_cmp) # %charge & spin was kept. none was requred. # %molecular spec. t['@DATA']['mol'] = i_cur['mol'] t['@DATA']['title'] = "energy & gradient.." # %routine # in dyn. interface, gs was 1, the first-es is 2, et al. # so there are n_es + 1 states # but gaussian, gamess, 'root=1' is first es. # so, x - 1 is ok n_state = int(i_cur['parm']['n_state']) - 1 i_state = int(i_cur['parm']['i_state']) - 1 if i_state == 0: i_state = 1 else: print "excited state condition [ignored]" # TDDFT SECTION at_tddft = t['@TDDFT'] at_tddft['NSTATE'] = str(n_state) at_tddft['IROOT'] = str(i_state) mystring = self.build_list(at_tddft) t['TDDFT'] = " $TDDFT " + mystring # CONTRL SECTION at_contrl = t['@CONTRL'] at_contrl['RUNTYP'] = "ENERGY" if 'MULT' not in at_contrl: at_contrl['MULT'] = "1" if "ICHARG" not in at_contrl: # print at_contrl['ICHARG'] at_contrl['ICHARG'] = "0" mystring = self.build_list(at_contrl) t['CONTRL'] = " $CONTRL " + mystring # recover template self.template2 = t self.__geom_data() return
def modify_td(self): """ tddft input, nstates & root would be updated. """ # current interface file data. i_cur = tools.load_data(self.files['interface']) t = copy.deepcopy(self.template_cmp) # %charge & spin was kept. none was requred. # %molecular spec. t['mol'] = i_cur['mol'] # %routine # in dyn. interface, gs was 1, the first-es is 2, et al. # so there are n_es + 1 states # but gaussian, root=1 is first es. # so, x - 1 is ok n_state = int(i_cur['parm']['n_state']) - 1 i_state = (i_cur['parm']['i_state']) - 1 routine = t['routine'] content = routine['content'] pat = re.compile("nstates=(\d+)", re.IGNORECASE) content = re.sub(pat, "nstates="+str(n_state), content) pat = re.compile("root=(\d+)", re.IGNORECASE) content = re.sub(pat, "root="+str(i_state), content) # calc force routine of gs if required pat = re.compile("(td\(.+?\))|(tda\(.+?\))|(cis\(.+?\))", re.IGNORECASE) force_content = re.sub(pat, "", content) # gs single point calc. pat = pat = re.compile("force", re.IGNORECASE) sp_content = re.sub(pat, "", force_content) # new content routine for es pat = re.compile("force", re.IGNORECASE) es_content = re.sub(pat, " ", content) # suppose the first occurance of */* is like b3lyp/6-31G* style pat = re.compile("\/[\S]+") es_content = re.sub(pat, "/ChkBasis", es_content, count=1) pat = re.compile("\/[\S]+") force_content = re.sub(pat, "/ChkBasis", force_content, count=1) # assign value t['routine']['content'] = sp_content t['routine']['es_content'] = content + " geom=AllCheck Guess=Read " if i_state == 0: t['routine']['force_content'] = force_content + " geom=AllCheck Guess=Read " t['routine']['es_content'] = es_content + " geom=AllCheck Guess=Read " # recover template self.template = t return
def feature_selection_solution(): train=load_data('train.csv') test=load_data('test.csv') le = preprocessing.LabelEncoder() le.fit(train['target']) train['target']=le.transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=feature_engineering(train[feature_cols]) X_test=feature_engineering(test[feature_cols]) feature_cols=[col for col in X_train.columns if col not in ['mean','std','nonzero','feat_6','feat_82','feat_84']] X_train=X_train[feature_cols] X_test=X_test[feature_cols] print X_train.columns y=train['target'] test_ids=test['id'] print 'feature_selection_solution' cross_v(get_rf(),X_train.values,y.values)# mean 0.595288515439 std 0.593551044059 nonzero 0.597406303207 #no fg 6 82 84 0.603600594376 #0.600058535601 clf=get_rf() clf.fit(X_train,y) preds = clf.predict_proba(X_test) write_submission(test_ids,preds,'submissions/feature_selection_rf100_84_82_6_nofg.csv')
def xgboost_solution(): train=load_data('train.csv') test=load_data('test.csv') le = preprocessing.LabelEncoder() le.fit(train['target']) train['target']=le.transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=train[feature_cols] X_test=test[feature_cols] y=train['target'] test_ids=test['id'] print 'rf_calibration_solution' dtrain = xgb.DMatrix(X_train, label=y) dtest = xgb.DMatrix(X_test, label=None) param = {'bst:max_depth':10, 'bst:min_child_weight': 4, 'bst:subsample': 0.5, 'bst:colsample_bytree':0.8, 'bst:eta':0.05} other = {'silent':1, 'objective':'multi:softprob', 'num_class':9, 'nthread': 4, 'eval_metric': 'mlogloss', 'seed':0} watchlist = [(dtrain,'train')] full_param = other.copy() full_param.update(param) plst = full_param.items() bst= xgb.train(plst, dtrain, 300, watchlist) preds = bst.predict(dtest) write_submission(test_ids,preds,'submissions/xgboost_solution.csv')
def xgboost_param_solution(): xgb=XGBoostClassifier(alpha=0, booster='gbtree', colsample_bytree=0.459971793632, early_stopping_rounds=30, eta=0.0305648288294, eval_metric='mlogloss', gamma=0.0669039612464, l=0, lambda_bias=0, max_delta_step=4, max_depth=14, min_child_weight=8, nthread=4, ntree_limit=0, num_class=9, num_round=1000, objective='multi:softprob', seed=84425, silent=0, subsample=0.972607582489, use_buffer=True) train=load_data('train.csv') test=load_data('test.csv') le = preprocessing.LabelEncoder() le.fit(train['target']) train['target']=le.transform(train['target']) feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=train[feature_cols] X_test=test[feature_cols] y=train['target'] test_ids=test['id'] xgb.fit(X_train, y) preds=xgb.predict_proba(X_test) write_submission(test_ids,preds,'submissions/xgboost_param_solution_76.csv')
def prepare(self): """ load configure file """ # dynamic info. self.dyn = tools.load_data(self.files['dyn']) return
def main(): train=load_data('train.csv') feature_cols= [col for col in train.columns if col not in ['target','id']] X_train,y_train=split_data(train,feature_cols) grid_search(X_train[feature_cols],y_train,get_clfs())
def collect_qm(self): """ wrt down in one file """ n_state = self.dim['n_state'] interface = tools.load_data("interface.json") index_state = interface['parm']['i_state'] # wrt files. fileout3 = open('qm_results.dat', 'w') # header fileout3.write('----------------------------------------- \n') fileout3.write('Summary of QM calculations: \n') fileout3.write('----------------------------------------- \n') qm_interface = self.directory['root'] + "/" + "qm_interface" filein4=open(qm_interface, 'r') fileout3.write(filein4.read()) fileout3.write('----------------------------------------- \n') filein4.close() fileout3.write('The electronic calculations focus on '+str(n_state)+' states: \n') for i_state in range(n_state) : fileout3.write('S'+str(i_state)+ ' .. ' ) fileout3.write('\n') fileout3.write('The S'+str(index_state-1)+' gradient should be computed ! \n') fileout3.write('----------------------------------------- \n') fileout3.write('Basis information: \n') filein4=open('qm_basis.dat','r') fileout3.write(filein4.read()) fileout3.write('----------------------------------------- \n') filein4.close() fileout3.write('Energy of electronic states: \n') filein4=open('qm_energy.dat','r') fileout3.write(filein4.read()) fileout3.write('----------------------------------------- \n') filein4.close() fileout3.write('Gradient on S'+str(index_state-1)+' \n') filein4=open('qm_gradient.dat','r') fileout3.write(filein4.read()) fileout3.write('----------------------------------------- \n') filein4.close() fileout3.write('Nonadiabatic coupling elements \n') sourceFile = 'qm_nac.dat' if os.path.isfile(sourceFile): filein4=open('qm_nac.dat','r') fileout3.write(filein4.read()) fileout3.write('----------------------------------------- \n') filein4.close() else : for i_state in range(n_state): for j_state in range(n_state): fileout3.write('S'+str(i_state)+' S'+str(j_state)+' 0.00000 \n') fileout3.write('----------------------------------------- \n') fileout3.close() return
from activation_functions import sigmoid_function, tanh_function, linear_function,\ LReLU_function, ReLU_function from NeuralNet import NeuralNetwork from tools import load_data import numpy as np # Load data from Hot_or_Not website scrape male_images, male_scores, fem_images, fem_scores = load_data() image_length = male_images.shape[1] settings = { # Preset Parameters "n_inputs" : image_length, # Number of input signals "n_outputs" : 1, # Number of output signals from the network "n_hidden_layers" : 1, # Number of hidden layers in the network (0 or 1 for now) "n_hiddens" : 100, # Number of nodes per hidden layer "activation_functions" : [ LReLU_function, sigmoid_function ], # Activation functions by layer # Optional parameters "weights_low" : -0.1, # Lower bound on initial weight range "weights_high" : 0.1, # Upper bound on initial weight range "save_trained_network" : False, # Save trained weights or not. "batch_size" : 1, # 1 for stochastic gradient descent, 0 for gradient descent } # Initialization network = NeuralNetwork( settings )
def prepare(self): """ first, prepare work dir; then, the necessary files. """ work_dir = self.directory["nac"] if os.path.exists(work_dir): shutil.rmtree(work_dir) if not os.path.exists(work_dir): os.makedirs(work_dir) sourceFile = self.directory["work_prev"] + "/mo.dat" destFile = self.directory["nac"] + "/mo_1.dat" shutil.copy2(sourceFile, destFile) sourceFile = self.directory["work_prev"] + "/ci.dat" destFile = self.directory["nac"] + "/ci_1.dat" shutil.copy2(sourceFile, destFile) sourceFile = self.directory["work"] + "/mo.dat" destFile = self.directory["nac"] + "/mo_2.dat" shutil.copy2(sourceFile, destFile) sourceFile = self.directory["work"] + "/ci.dat" destFile = self.directory["nac"] + "/ci_2.dat" shutil.copy2(sourceFile, destFile) sourceFile = self.directory["work"] + "/qm_results.dat" destFile = self.directory["nac"] + "/qm_results.dat" shutil.copy2(sourceFile, destFile) sourceFile = self.directory["work"] + "/" + self.files["dimension"] destFile = self.directory["nac"] + "/" + self.files["dimension"] shutil.copy2(sourceFile, destFile) sourceFile = self.directory["overlap"] + "/ao_overlap.dat" destFile = self.directory["nac"] + "/ao_overlap.dat" shutil.copy2(sourceFile, destFile) os.chdir(work_dir) # load internal data. filename = self.files["dimension"] dim = tools.load_data(filename) n_atom = dim["n_atom"] # Number of atom n_state = dim["n_state"] # Number of states n_ao = dim["n_basis"] # number of basis functions n_occ = dim["nocc_allA"] # number of occupied orbitals fileout1 = open("main_overlap_slater_input", "w") fileout1.write(" read (*,*) \n") fileout1.write("" + str(n_atom) + " read (*,*) n_atom \n") fileout1.write("" + str(n_ao) + " read (*,*) n_ao \n") fileout1.write("" + str(n_occ) + " read (*,*) n_ele_alpha \n") fileout1.write("" + str(n_occ) + " read (*,*) n_ele_beta \n") fileout1.write(" read (*,*) \n") fileout1.write("" + str(n_state) + " read (*,*) n_state \n") fileout1.write(" read (*,*) \n") fileout1.write("1 read (*,*) type_input \n") fileout1.write("ci_1.dat read (*,*) filename_input1 \n") fileout1.write("ci_2.dat read (*,*) filename_input2 \n") fileout1.write("overlap.dat read (*,*) filename_input2 \n") fileout1.write(" read (*,*) \n") fileout1.write("0 read (*,*) output_level \n") fileout1.write("ci_overlap.dat read (*,*) filename_output \n") fileout1.close() print "NAC PREPARED" return
print "L=", L print "N=", N print "R=", R print "noise=", noise # Determine output numbers from albedo data file albedo_file = open(albedo_fname) outputs = [] line = albedo_file.readline() while line != "": if line.startswith("#") and "Output" in line: outputs.append(int(line.split("=")[1])) line = albedo_file.readline() albedo_file.close() its, temp, alb, veget = tools.load_data(run_fname) albedo_file = open(albedo_fname) temperature_file = open(temperature_fname) vegetation_file = open(vegetation_fname) for i in range(5): albedo_file.readline() temperature_file.readline() vegetation_file.readline() # Vegetation custom 2-color colormap veg_cmap = matplotlib.colors.ListedColormap(["white","darkgreen"]) print "Outputs found:", outputs for out in outputs: albedo = []
def main(): print 'SSGPR Tuning' usage = 'usage: %prog [options]' parser = optparse.OptionParser(usage) generalgroup = optparse.OptionGroup(parser, 'General Options') datasetgroup = optparse.OptionGroup(parser, 'Dataset Options') optimizationgroup = optparse.OptionGroup(parser, 'Optimization Options') generalgroup.add_option('--projections', dest='projections', type='int', default=100, metavar='D', help='number of spectral projections (default: 100)') generalgroup.add_option('--nofixed', dest='fixed', action='store_false', default=True, help='tune sparse spectrum frequencies') generalgroup.add_option('--params', dest='params', default=None, metavar='[\sigma_n, \sigma_f, \ell_1, ..., \ell_n]', help='set hyperparameters (takes precedence over --guess)') generalgroup.add_option('--seed', dest="seed", default=None, type='int', help='seed for the PRNG (default: None)') generalgroup.add_option('--yarp', dest="yarp", default=False, action='store_true', help='serialize to YARP learningMachine compatible files') # datasets for 4 different functions datasetgroup.add_option('-i', '--inputs', dest='inputs', default=None, metavar='IDX[,IDX]*', help='input column indices') datasetgroup.add_option('-o', '--outputs', dest='outputs', default=None, metavar='IDX[,IDX]*', help='output column indices') datasetgroup.add_option('--guess', dest='guess', default=None, metavar='DATASET', help='guess hyperparameters using specified dataset') datasetgroup.add_option('--optimize', dest='optimize', default=None, metavar='DATASET', help='optimize hyperparameters using specified dataset') datasetgroup.add_option('--train', dest='train', default=None, metavar='DATASET', help='train machine using specified dataset') datasetgroup.add_option('--test', dest='test', default=None, metavar='DATASET', help='test machine on specified dataset') # optimization options optimizationgroup.add_option('--solver', dest='solver', default='ralg', help='solver (default: ralg)') optimizationgroup.add_option('--verboseopt', dest='verboseopt', default=False, action='store_true', help='enable verbose optimization output') optimizationgroup.add_option('--ftol', dest='ftol', type='float', default=1e-4, metavar='TOL', help='function tolerance for stop condition (default: 1e-4)') optimizationgroup.add_option('--gtol', dest='gtol', type='float', default=1e-4, metavar='TOL', help='gradient tolerance for stop condition (default: 1e-4)') optimizationgroup.add_option('--maxtime', dest='maxtime', type='float', default=3600., metavar='SECONDS', help='maximum time (default: 3600)') optimizationgroup.add_option('--maxiters', dest='maxiters', type='int', default=2000, metavar='ITERS', help='maximum iterations (default: 2000)') optimizationgroup.add_option('--maxfevals', dest='maxfevals', type='int', default=5000, metavar='EVALS', help='maximum function evaluations (default: 5000)') parser.add_option_group(generalgroup) parser.add_option_group(datasetgroup) parser.add_option_group(optimizationgroup) (options, args) = parser.parse_args() numpy.random.seed(options.seed) input_cols = tools.strtoidx(options.inputs) or [0] output_cols = tools.strtoidx(options.outputs) or [-1] n = len(input_cols) p = len(output_cols) # some arbitrary default parameters and no hyperpriors sigma_o, sigma_o_prior = 2., NoPrior() l, l_prior = [10.] * n, [NoPrior()] * n sigma_n, sigma_n_prior = 0.2, NoPrior() # construct machine and feature mapping ssf = SparseSpectrumFeatures(n, nproj = options.projections, sigma_o = sigma_o, sigma_o_prior = sigma_o_prior, l = l, l_prior = l_prior, fixed = options.fixed) ssgpr = LinearGPR(n, p, ssf, sigma_n = sigma_n, sigma_n_prior = sigma_n_prior) print 'General Info' print '%12s: %s -> %s' % ('columns', input_cols, output_cols) print '%12s: %d' % ('#proj', options.projections) print '%12s: %s' % ('fixed', options.fixed) print '%12s: (%d -> %d) -> %d' % ('dimensions', n, ssf.outputdim(), p) print '%12s: %s' % ('seed', options.seed) # rudimentary guess of hyperparameters based on data if options.guess: print '\nHyperparameter Guess: %s' % (options.guess) guessx, guessy = tools.load_data(options.guess, input_cols, output_cols) print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + guessx.shape + guessy.shape) start = time.time() ssgpr.guessparams(guessx, guessy) end = time.time() print '%12s: %d seconds' % ('timing', end - start) # set hyperparameters if given if options.params is not None: ssgpr.setparams(list(eval(options.params))) # optimize hyperparameters if options.optimize: print '\nHyperparameter Optimization: %s' % (options.optimize) hyperx, hypery = tools.load_data(options.optimize, input_cols, output_cols) print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + hyperx.shape + hypery.shape) print '%12s: %s' % ('solver', options.solver) print '%12s: % g' % ('ftol', options.ftol) print '%12s: % g' % ('gtol', options.gtol) print '%12s: % g seconds' % ('max time', options.maxtime) print '%12s: % d' % ('max fevals', options.maxfevals) print '%12s: % d' % ('max iters', options.maxiters) start = time.time() res = ssgpr.optimize(hyperx, hypery, solver = options.solver, verbose = options.verboseopt, ftol = options.ftol, gtol = options.gtol, maxtime = options.maxtime, maxIter = options.maxiters, maxFunEvals = options.maxfevals, checkgrad = False) end = time.time() print '%12s: % g seconds' % ('timing', end - start) print '%12s: % g' % ('opt lml', res.ff) print '%12s: %s' % ('stop cond', res.msg) print '%12s: % d' % ('fevals', res.evals['f']) print '%12s: % d' % ('dfevals', res.evals['df']) print '%12s: % d' % ('iters', res.evals['iter']) # train ssgpr using dataset if options.train: print '\nTraining: %s' % (options.train) trainx, trainy = tools.load_data(options.train, input_cols, output_cols) print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + trainx.shape + trainy.shape) start = time.time() ssgpr.train(trainx, trainy) end = time.time() print '%12s: % g seconds' % ('timing', end - start) lml = ssgpr.lmlfunc() print '%12s: % g' % ('lml', lml) # test ssgpr on dataset if options.test: print '\nTesting: %s' % (options.test) testx, testy = tools.load_data(options.test, input_cols, output_cols) print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + testx.shape + testy.shape) start = time.time() testy_p, testy_pv = ssgpr.predict(testx) end = time.time() print '%12s: % g seconds' % ('timing', end - start) se = (testy - testy_p)**2 nse = se / testy.var(axis=0) lp = (se / testy_pv) + numpy.log(2. * numpy.pi) + numpy.log(testy_pv) print '%12s: %s' % ('mse', se.mean(axis=0)) print '%12s: %s' % ('nmse', nse.mean(axis=0)) print '%12s: %s' % ('nmlp', 0.5 * lp.mean(axis=0)) if options.yarp: print '\nYarp LearningMachine Serialization' machine_fn = 'ssgpr.model' tools.serialize_machine(machine_fn, ssgpr) print '%12s: %s' % ('machine', machine_fn) preprocessor_fn = 'ssf.model' tools.serialize_preprocessor(preprocessor_fn, ssf) print '%12s: %s' % ('preproc', preprocessor_fn)
""" from sklearn import ensemble from tools import load_data from sklearn.metrics import log_loss from sklearn.calibration import CalibratedClassifierCV from feature_engineering import feature_engineering from sklearn import cross_validation from tools import cross_v import matplotlib.pyplot as plt plt.style.use('ggplot') def get_rf(): forest=ensemble.RandomForestClassifier(n_estimators=100) return forest train=load_data('train.csv') feature_cols = [col for col in train.columns if col not in ['id','target']] X_train=feature_engineering(train[feature_cols]).values y=train['target'].values X_train, X_test, y_train, y_test=cross_validation.train_test_split(X_train,y,test_size=0.33,random_state=1) skf = cross_validation.StratifiedKFold(y_train, n_folds=10, random_state=42) calibration_method = 'isotonic' clf=get_rf() ccv = CalibratedClassifierCV(base_estimator=clf, method=calibration_method, cv=skf) #ccv.fit(X_train,y_train) #pred=ccv.predict_proba(X_test) clf.fit(X_train,y_train) pred=clf.predict_proba(X_test) score=log_loss(y_test,pred)
lumin = min_lumin while lumin <= max_lumin + lumin_step: lumins.append(lumin) teq_temps.append((lumin*917.0*(1-0.5)/ 5.670367e-8)**0.25-273.15) plt.clf() power_runs_avg = [] foo = [] for run in range(1,runs_per_lumin+1): fname = "L%.2f_r%02d.dat" % (lumin, run) print "Reading %s ..." % fname its, temp, albedo, veget = tools.load_data(datadir + fname) avg_temp = sum(temp)/float(len(temp)) foo.append(avg_temp) # compute and plot power spectrum freqs, power = tools.power_spectrum(temp, sampling=1.0) freqs = freqs[1:] power = power[1:] if len(power_runs_avg) == 0: power_runs_avg = power else: for i in range(len(power_runs_avg)): power_runs_avg[i] += power[i] plt.loglog(freqs, power, color="blue", alpha=0.25)
def get_dim_info(self): """ obtain dimension data. such as number of atoms and et al. parser gamess-us log file. """ # default setting myobj = tools.load_data(self.files['interface']) self.dim['n_state'] = myobj['parm']['n_state'] self.dim['i_state'] = myobj['parm']['i_state'] # read logfile = self.files['log'] fp = open(logfile, "r") line = "STARTER" pat = re.compile("TOTAL NUMBER OF BASIS SET SHELLS") while line != "": line = fp.readline() m = pat.search(line) if m is not None: break # shell num. t_line = line # print t_line val = t_line.split("=")[1] n_shell = int(val) # READ THE FOLLOWING LINES # 9 lines # TOTAL NUMBER OF BASIS SET SHELLS = 10 # NUMBER OF CARTESIAN GAUSSIAN BASIS FUNCTIONS = 38 # NUMBER OF ELECTRONS = 14 # CHARGE OF MOLECULE = 0 # SPIN MULTIPLICITY = 1 # NUMBER OF OCCUPIED ORBITALS (ALPHA) = 7 # NUMBER OF OCCUPIED ORBITALS (BETA ) = 7 # TOTAL NUMBER OF ATOMS = 2 # THE NUCLEAR REPULSION ENERGY IS 22.5117346394 # # number of cart gaussian basis functions t_line = fp.readline() val = t_line.split("=")[1] self.dim['n_basis'] = int(val) #print t_line # number of electrons t_line = fp.readline() val = t_line.split("=")[1] self.dim['n_elec'] = int(val) # mol. charge t_line = fp.readline() val = t_line.split("=")[1] charge = int(val) # spin-mult t_line = fp.readline() val = t_line.split("=")[1] spin = int(val) # number-occupied-orbitals-alpha t_line = fp.readline() val = t_line.split("=")[1] self.dim['neleA'] = int(val) # number-occupied-orbitals-beta t_line = fp.readline() val = t_line.split("=")[1] self.dim['neleB'] = int(val) #print line # number-of-atoms t_line = fp.readline() val = t_line.split("=")[1] self.dim['n_atom'] = int(val) # other self.dim['noccA'] = self.dim['neleA'] self.dim['nvirA'] = self.dim['n_basis'] - self.dim['neleA'] self.dim['nvir_allA'] = self.dim['nvirA'] self.dim['nocc_allA'] = self.dim['noccA'] # TDDFT INPUT PARAMETERS pat = re.compile("TDDFT INPUT PARAMETERS") line = "starter" while line != "": line = fp.readline() m = pat.search(line) if m is not None: break line = fp.readline() # reading... # NSTATE= 3 IROOT= 1 MULT= 1 t_line = fp.readline() pat0 = re.compile("NSTATE=(.*)IROOT=(.*)MULT=(.*)") m = pat0.search(t_line) if m is not None: self.dim['n_state'] = int(m.group(1)) + 1 # because of the ground state. self.dim['i_state'] = int(m.group(2)) else: print "<^WARNING> CANNOT FIND TD-DFT INPUT PARAMETERS SETTING: [suppose it to be ground state]" fp.close() tools.dump_data('dimension.json', self.dim) return
def main(): train=load_data('train.csv') feature_cols= [col for col in train.columns if col not in ['target','id']] X_train=feature_engineering(train[feature_cols]) y=train['target'] grid_search(X_train,y,get_clfs())