Ejemplo n.º 1
0
    def modify_dimer(self):
        """
        setup ao overlap calc.
        """
        # load interface data from previous and current step
        filename = self.files
        i_pre = tools.load_data(filename['previous'])
        i_cur = tools.load_data(filename['current'])
        t = copy.deepcopy(self.template_cmp)
        # only the high model is required to calc. ao
        # modify charge & spin (kept)
        charge = []
        for c in t['charge']:
            charge.append(c * 2)
        t['charge'] = charge
        # molecular spec.
        mol = self.__merge_mols([i_cur['mol'], i_pre['mol']])
        t['mol'] = mol

        # routine
        routine = t['routine']
        theory = "HF"
        basis = routine['basis']
        model = theory + "/" + basis
        other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full"
        t['routine']['content'] = "#" + model + " " + other
        # delete connect
        if 'connect' in t.keys():
            del t['connect']

        # recover template
        self.template = t

        return
Ejemplo n.º 2
0
    def modify_dimer(self):
        """
        setup ao overlap calc.
        """
        # load interface data from previous and current step
        filename = self.files
        i_pre = tools.load_data(filename['previous'])
        i_cur = tools.load_data(filename['current'])
        t = copy.deepcopy(self.template_cmp)

        # modify charge & spin (kept)
        t['charge'] = t['charge'] * 2
        # molecular spec.
        mol = self.__merge_mols([i_cur['mol'], i_pre['mol']])
        t['mol'] = mol

        # routine
        routine = t['routine']
        theory = "HF"
        basis = routine['basis']
        model = theory + "/" + basis
        other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full"
        t['routine']['content'] = "#" + model + " " + other

        # recover template
        self.template = t

        return
Ejemplo n.º 3
0
    def modify(self):
        """
        setup ao overlap calc.
        """
        print "AO the Working Directory is:\n", os.getcwd()

        # load interface data from previous and current step
        i_pre = tools.load_data(self.files['previous'])
        i_cur = tools.load_data(self.files['current'])
        t = copy.deepcopy(self.template_cmp['high-model']['template'])
        # only the high model is required to calc. ao
        # modify charge & spin (kept)
        t['charge'] *= 2
        # molecular spec.
        t['mol'] = self.merge_mol([i_cur['mol'], i_pre['mol']], t['region'])

        # routine
        routine = t['routine']
        theory = "HF"
        basis = routine['basis']
        print basis
        model = theory + "/" + basis
        other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full"
        t['routine']['content'] = "# " + model + " " + other
        # delete connect
        if 'connect' in t.keys():
            del t['connect']
        # recover template
        self.template['high-model']['template'] = t

        return
Ejemplo n.º 4
0
    def modify_dimer(self):
        """
        setup ao overlap calc.
        """
        # load interface data from previous and current step
        filename = self.files
        i_pre = tools.load_data(filename['previous'])
        i_cur = tools.load_data(filename['current'])        
        t = copy.deepcopy(self.template_cmp)        
        
        # modify charge & spin (kept)
        t['charge'] = t['charge']*2
        # molecular spec.
        mol = self.__merge_mols([i_cur['mol'], i_pre['mol']])        
        t['mol'] = mol

        # routine
        routine = t['routine']
        theory = "HF"
        basis = routine['basis']
        model = theory + "/" + basis
        other = "nosymm iop(2/12=3,3/33=1) guess=only pop=full"
        t['routine']['content'] = "#" + model + " " + other
 
        # recover template
        self.template = t
        
        return
Ejemplo n.º 5
0
def periodical_load(t1, interval):
    # t1 = str(get_ts() - interval)
    t2 = str(get_ts())
    load_data(t2)
    periodical_load.cnt += 1
    if periodical_load.cnt > 1:
        path = os.path.join('Dataset', 'dataset_%s-%s.arff' % (t1, t2))
        generate_training_file(path, t1, t2)
    Timer(interval, periodical_load, [t2, interval]).start()
Ejemplo n.º 6
0
 def load(self):
     """
     load template.json and interface.json
     """
     filename1 = self.files['template']
     filename2 = self.files['interface']
     obj_1 = tools.load_data(filename1)
     obj_2 = tools.load_data(filename2)
     self.template = copy.deepcopy(obj_1)
     self.interface = copy.deepcopy(obj_2)
def benchmark_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    lbl_enc = preprocessing.LabelEncoder()
    train['target'] = lbl_enc.fit_transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    y=train['target']
    X_test=test[feature_cols]
    test_ids=test['id']
    print "benchmark solution"
    cross_v(get_rf(),X_train.values,y.values)#0.596256539386
Ejemplo n.º 8
0
 def prepare(self):
     """ load configure file """
     # dynamic info.
     self.dyn  = tools.load_data(self.files['dyn'])
     # gaussian directory structure info.
     #script_dir = os.path.split(os.path.realpath(sys.argv[0]))[0]
     script_dir = os.path.split(os.path.realpath(__file__))[0]
     self.config = tools.load_data(script_dir + "/config.in")
     self.config['root'] = os.getcwd()
     # attach dyn info in config vars
     self.config.update(self.dyn['quantum'])
     return
Ejemplo n.º 9
0
    def prepare(self):
        """ load configure file """
        # dynamic info.
        self.dyn = tools.load_data(self.files['dyn'])

        # molpro directory structure info.
        script_dir = os.path.split(os.path.realpath(__file__))[0]
        self.config = tools.load_data(script_dir + "/config.in")
        self.config['root'] = os.getcwd()
        self.config.update(self.dyn['quantum'])

        return
Ejemplo n.º 10
0
 def __init__(self, pop_size, elite_size, K, upper, lower, seconds):
     self.train_data, self.train_targets = load_data("cwk_train")
     self.test_data, self.test_targets = load_data("cwk_test")
     self.train_targets = list(self.train_targets)
     self.test_targets = list(self.test_targets)
     self.pop_size = pop_size
     self.elite_size = elite_size
     self.upper = upper
     self.lower = lower
     self.seconds = seconds
     self.K = K
     self.population = []
Ejemplo n.º 11
0
    def prepare(self):
        """ load configure file """
        # dynamic info.
        self.dyn  = tools.load_data(self.files['dyn'])
        # gaussian directory structure info.
        #script_dir = os.path.split(os.path.realpath(sys.argv[0]))[0]
        script_dir = os.path.split(os.path.realpath(__file__))[0]
        self.config = tools.load_data(script_dir + "/config.in")
        self.config['root'] = os.getcwd()
        # attach dyn info in config vars
        self.config.update(self.dyn['quantum'])
 
        return
Ejemplo n.º 12
0
 def __init__(self, inertia, cognitive, social, num_particles):
     self.train_data, self.train_targets = load_data("cwk_train")
     self.train_targets = list(self.train_targets)
     self.test_data, self.test_targets = load_data("cwk_test")
     self.test_targets = list(self.test_targets)
     self.num_particles = num_particles
     self.inertia = inertia
     self.cognitive = cognitive
     self.social = social
     # initialise population
     self.particles = []
     for i in range(self.num_particles):
         self.particles.append(Particle())
Ejemplo n.º 13
0
def loadData(datatype):

    if datatype == 'gtsrb_binary':
        x_train, x_test, y_train, y_test = load_data('gtsrb_binary', 2)
        input_shape = 3 * 48 * 48
    elif datatype == 'cifar10_binary':
        x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2)
        input_shape = 3 * 32 * 32
    elif datatype == 'cifar10':
        x_train, x_test, y_train, y_test = load_data('cifar10', 2)
        input_shape = 3 * 32 * 32

    return x_train, x_test, y_train, y_test, input_shape
Ejemplo n.º 14
0
def loadData(datatype):

    if datatype is 'gtsrb_binary':
        x_train, x_test, y_train, y_test = load_data('gtsrb_binary', 2)
        x_train = x_train.reshape((-1, 3, 48, 48))
        x_test = x_test.reshape((-1, 3, 48, 48))
        input_shape = 3*48*48
    elif datatype is 'cifar10_binary':
        x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2)
        x_train = x_train.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32)
        x_test = x_test.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32)
        input_shape = 3*32*32

    return x_train, x_test, y_train, y_test, input_shape
Ejemplo n.º 15
0
def main_train(key, noise, divide_num=5):
    """在指定噪声率下,训练,并返回其精度、标准差"""

    temp_acc = []
    print(key, "数据集 ", "噪声率为:", noise, sep="")
    for i in range(divide_num):
        train, test = load_data(key, 0.30, noise)

        acc0 = 0

        acc1 = 0  # 调试RD阈值

        clean_train = relative_density(train)  # RD
        acc2 = kernel_LR(clean_train, test)

        temp_acc.append([acc0, acc1, acc2])
        print("klr:", acc0, "crf:", acc1, "rd:", acc2)

    mean_acc = np.mean(temp_acc, axis=0)
    round_acc = np.round(mean_acc, 4)

    std_acc = np.std(temp_acc, axis=0)
    round_accstd = np.round(std_acc, 4)

    print("KLR:%.4lf" % (round_acc[0]), "CRF:%.4lf" % (round_acc[1]),
          "RD:%.4lf" % (round_acc[2]), "KLR:%.4lf" % (round_accstd[0]),
          "CRF:%.4lf" % (round_accstd[1]), "RD:%.4lf" % (round_accstd[2]))
    print()

    return mean_acc, round_acc, round_accstd
Ejemplo n.º 16
0
def main_train(key, noise, divide_num=5):
    """在指定噪声率下,训练,并返回其精度、标准差"""

    temp_acc = []
    print(key, "数据集 ", "噪声率为:", noise, sep="")
    for i in range(divide_num):
        train, test = load_data(key, 0.30, noise)

        acc0 = 0

        acc1 = CRFNFL_SVM(train, test)  # 调用随机森林

        acc2 = 0

        temp_acc.append([acc0, acc1, acc2])
        print("ksvm:", acc0, "crf:", acc1, "rd:", acc2)

    mean_acc = np.mean(temp_acc, axis=0)
    round_acc = np.round(mean_acc, 4)

    std_acc = np.std(temp_acc, axis=0)
    round_accstd = np.round(std_acc, 4)

    print("KSVM:%.4lf" % (round_acc[0]), "CRF:%.4lf" % (round_acc[1]),
          "RD:%.4lf" % (round_acc[2]), "KSVM:%.4lf" % (round_accstd[0]),
          "CRF:%.4lf" % (round_accstd[1]), "RD:%.4lf" % (round_accstd[2]))
    print()

    return mean_acc, round_acc, round_accstd
Ejemplo n.º 17
0
 def load(self, filename = "template.json"):
     """
     load template.json
     """
     obj = tools.load_data(filename)
     
     return obj
Ejemplo n.º 18
0
    def run(self):
        """
        raise the calc. code.
        """
        # load interface file
        interface = tools.load_data(self.files['interface'])
        it = int(interface['parm']['i_time'])
        label_ZN = int(self.dyn['control']['label_ZN'])

        qm_method = int(self.dyn['control']['qm_method'])

        config = self.config

        # Start the QC calculations

        if qm_method == 2 or qm_method == 3:

            molpro_template(config)
            molpro_run(label_ZN, config)

        else:
            print "QM method : error: no such method", qm_method
            sys.exit(1)

        return
Ejemplo n.º 19
0
    def run(self):
        """
        raise the calc. code.
        """
        # load interface file
        interface = tools.load_data(self.files['interface'])
        it = int(interface['parm']['i_time'])

        qm_method = int(self.dyn['control']['qm_method'])

        config = self.config
        # Start the QC calculations  (CIS, TDHF, TDDFT)
        if qm_method == 11:
            # Do electronic structure calculation at time zero
            if it == 0:
                # call zero time
                # % make template: dump gjf in json format
                gau_template(config)
                # % call gaussian
                gau_zero(config)

            elif it > 0:
                gau_nonzero(config)
                print "now work dir:", os.getcwd()
                gau_overlap(config)
                nac = gau_nac(config)
            else:
                print "Error: keyword 'it':", it
                sys.exit(0)
        else:
            print "QM method : error: no such method", qm_method
            sys.exit(1)

        return
Ejemplo n.º 20
0
    def load(self, filename="template.json"):
        """
        load template.json
        """
        obj = tools.load_data(filename)

        return obj
Ejemplo n.º 21
0
    def run(self):
        """
        raise the calc. code.
        """
        # load interface file
        interface = tools.load_data(self.files['interface'])
        it = int(interface['parm']['i_time'])
 
        qm_method = int(self.dyn['control']['qm_method'])
    
        config = self.config
        # Start the QC calculations  (CIS, TDHF, TDDFT)
        if qm_method == 11:
        # Do electronic structure calculation at time zero
            if it == 0:
                # call zero time     
                # % make template: dump gjf in json format
                gms_template(config)                 
                # % call gaussian
                gms_zero(config)
                
            elif it > 0:
                gms_nonzero(config)
                print "now work dir:", os.getcwd()
                gms_overlap(config)
                gms_nac(config)        
            else:
                print "Error: keyword 'it':", it
                sys.exit(0)                
        else:
            print "QM method : error: no such method" , qm_method  
            sys.exit(1)
            
        return
Ejemplo n.º 22
0
def main():
    print("Analysis of the US Congress members from 1947 to 2014")
    print()
    data = tools.load_data()

    print("The 5 oldest members of Congress at the beginning of "
          "their mandate")
    for i, member in enumerate(tools.find_oldest(data)[:5]):
        print(f'{i + 1}. {member.firstname} {member.lastname} who started in '
              f'{member.termstart} at {member.age}')

    print()
    print("The 5 youngest members of Congress at the beginning of "
          "their mandate")
    for i, member in enumerate(tools.find_youngest(data)[:5]):
        print(f'{i + 1}. {member.firstname} {member.lastname} who started in '
              f'{member.termstart} at {member.age}')

    print()
    print("The 5 oldest members of Congress at the beginning of "
          "their mandate who are Rep.")
    for i, member in enumerate(tools.find_oldest_rep(data)[:5]):
        print(f'{i + 1}. {member.firstname} {member.lastname} who started in '
              f'{member.termstart} at {member.age}')

    print()
    print("The 5 youngest members of Congress at the beginning of "
          "their mandate who are Dem.")
    for i, member in enumerate(tools.find_youngest_dem(data)[:5]):
        print(f'{i + 1}. {member.firstname} {member.lastname} who started in '
              f'{member.termstart} at {member.age}')
Ejemplo n.º 23
0
    def run(self):
        """
        raise the calc. code.
        """
        # load interface file
        interface = tools.load_data(self.files['interface'])
        it = int(interface['parm']['i_time'])
        label_ZN = int(self.dyn['control']['label_ZN'])

        qm_method = int(self.dyn['control']['qm_method'])

        config = self.config
        # Start the QC calculations  (MRCI)
        if qm_method == 1:
            # % make template: dump input in json format
            mndo_template(config)
            # % call mndo
            print "now work dir:", os.getcwd()

            mndo_run(label_ZN, config)

        else:
            print "QM method : error: no such method", qm_method
            sys.exit(1)

        return
def plot_cor(data):
    """Plot pairwise correlations of features in the given dataset"""
    from matplotlib import cm

    cols = data.columns.tolist()
    fig = plt.figure(figsize=(12,12))
    ax = fig.add_subplot(111)
    
    # Plot absolute value of pairwise correlations since we don't
    # particularly care about the direction of the relationship,
    # just the strength of it
    cax = ax.matshow(data.corr().abs(), cmap=cm.YlOrRd)
    
    fig.colorbar(cax)
    ax.set_xticks(np.arange(len(cols)))
    ax.set_yticks(np.arange(len(cols)))
    ax.set_xticklabels(cols)
    ax.set_yticklabels(cols)
    ax.set_title("Correlation Matrix of Features")

#def main():   
    train=load_data('train.csv')
    cols = [col for col in train.columns if col not in ['id','target']] 
    features=['feat_34','feat_11','feat_40','feat_26','feat_60','feat_25','feat_86','feat_15','feat_90','feat_14','feat_42','feat_67','feat_62','feat_36','feat_24','target']
    #xCol = 'target'
    #for col in cols:
    #    plotHist(train, col,xCol)
    x_data=train[features]
    #plot_cor(x_data)
    y=train['target']
    class1=y[[y=='class1']]
Ejemplo n.º 25
0
 def load(self):
     """
     load interface.json
     """
     filename = self.files['interface']
     obj = tools.load_data(filename)
     self.interface = copy.deepcopy(obj)
Ejemplo n.º 26
0
    def run(self):
        """
        raise the calc. code.
        """
        # load interface file
        interface = tools.load_data(self.files['interface'])
        it = int(interface['parm']['i_time'])
 
        qm_method = int(self.dyn['quantum']['qm_method'])
        print "QM_METHOD: ", qm_method
        
        config = self.config
        # 
        # make template
        Template(self.config)
        # Start the QC calculations
        if it == 0:
            firstStep(config)
            buildSOC(config)
            buildNAC(config)
        elif it > 0:
            nextStep(config)
            buildSOC(config)
            buildNAC(config)
        else:
            print "Error: keyword 'it':", it
            sys.exit(0)                
        
        return
def feature_engineering_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=feature_engineering(train[feature_cols])
    X_test=feature_engineering(test[feature_cols])
    feature_cols= [col for col in X_train.columns]#std 0.607958003167 mean 0.615741311533
    X_train=X_train[feature_cols]
    X_test=X_test[feature_cols]
    y=train['target']
    test_ids=test['id']
    print 'feature_engineering_solution'
    cross_v(get_rf(),X_train.values,y.values)#0.600017926514
Ejemplo n.º 28
0
 def load(self):
     """ load template.json """
     filename = self.files['template']
     obj = tools.load_data(filename)
     self.template = copy.deepcopy(obj)
     self.template_cmp = copy.deepcopy(obj)
     return obj
Ejemplo n.º 29
0
def main():
    """ Main routine of PINK data preconditioning """

    parser = argparse.ArgumentParser(description='PINK data preconditioning')
    parser.add_argument('data',
                        help='Data input file (.npy or .bin)',
                        action=tools.check_extension({'npy', 'bin'}))
    parser.add_argument('-o', '--output', help='Data output file')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Be talkative')
    parser.add_argument(
        '-s',
        '--scale',
        action='store_true',
        help='Scale the input data to be within the range [0, 1]')
    args = parser.parse_args()

    if os.path.splitext(args.data)[1][1:] == "npy":
        data = np.load(args.data).astype(np.float32)
    elif os.path.splitext(args.data)[1][1:] == "bin":
        data = tools.load_data(args.data)

    print('shape:             ', np.shape(data))
    print('size:              ', data.size)
    print('min value:         ', np.amin(data))
    print('max value:         ', np.amax(data))
    print('non-zero elements: ', np.count_nonzero(data))
    print('sparsity:          ', np.count_nonzero(data) / data.size)

    if args.scale:

        print('Data will be linearly scaled to be within the range [0.0, 1.0]')

        min_element = np.amin(data)
        max_element = np.amax(data)
        factor = 1 / (max_element - min_element)

        print('min value: ', min_element)
        print('max value: ', max_element)
        print('factor: ', factor)

        data = (data - min_element) * factor

        print('min value: ', np.amin(data))
        print('max value: ', np.amax(data))

    if args.output:
        print('Output file written at', args.output)
        if os.path.splitext(args.output)[1][1:] == "npy":
            np.save(args.output, data)
        elif os.path.splitext(args.output)[1][1:] == "bin":
            tools.save_data(args.output, data)
        else:
            raise RuntimeError('Unsupported output file extension: ',
                               os.path.splitext(args.output)[1][1:])

    print('All done.')
def model_selection_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    X_test=test[feature_cols]
    y=train['target']
    test_ids=test['id']
    print 'gbrt_tuned_selection_solution'
    #cross_v(get_tuned_gb(),X_train.values,y.values)
    clf=get_tuned_gb()
    clf.fit(X_train,y)
    preds = clf.predict_proba(X_test)
    write_submission(test_ids,preds,'submissions/gbrt_tuned_selection_solution.csv')
Ejemplo n.º 31
0
def do_train():
    train_data = load_data('./data/train.txt')
    val_data = load_data('./data/val.txt')

    net = Net()
    model = net.get_model()

    evaluator = Evaluator(val_data, config.model_save_to, model, net.CRF,
                          net.NER)
    train_generator = data_generator(train_data, config.batch_size)

    history = model.fit_generator(train_generator.forfit(),
                                  steps_per_epoch=len(train_generator),
                                  epochs=config.epochs,
                                  callbacks=[evaluator])

    return history
def parameter_tuning_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    X_test=test[feature_cols]
    y=train['target']
    test_ids=test['id']
    print 'parameter_tuning_solution800_6'
    cross_v(get_tuned_rf(),X_train.values,y.values)#0.546637992781
    clf=get_tuned_rf()
    clf.fit(X_train,y)
    preds = clf.predict_proba(X_test)
    write_submission(test_ids,preds,'submissions/parameter_tuning_solution800_6.csv')
Ejemplo n.º 33
0
def attack():
    x_train, x_test, y_train, y_test = load_data('cifar10', 2)
    min_pixel_value = x_train.min()
    max_pixel_value = x_train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    s = time.time()

    # model = BNN(['../binary/checkpoints/cifar10_mlpbnn_approx_%d.h5' % (i) for i in range(100)])
    model = BNN([
        '../binary/checkpoints/cifar10_mlpbnn_approx_ep004_%d.h5' % (i)
        for i in range(100)
    ])

    pred_y = model.predict(x_test)
    print('pred_y: ', pred_y)
    np.savetxt('pred_y', pred_y)
    np.savetxt('y_test', y_test)
    print('pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1]',
          pred_y[0], pred_y[288], pred_y[888], pred_y[1990], y[-1])
    print('Accuracy: ', accuracy_score(y_true=y_test, y_pred=pred_y))

    # Create a model wrapper
    predictWrapper = modelWrapper(model)

    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=(32 * 32 * 3, ),
                                    nb_classes=2,
                                    clip_values=(min_pixel_value,
                                                 max_pixel_value))

    print('----- generate adv data -----')
    attack = BoundaryAttack(estimator=classifier,
                            targeted=False,
                            delta=0.01,
                            epsilon=0.01,
                            max_iter=100,
                            num_trial=100,
                            sample_size=100,
                            init_size=100)

    print('----- generate adv test data -----')
    x_test = x_test[288]
    # Input data shape should be 2D
    x_test = x_test.reshape((-1, 32 * 32 * 3))
    x_test_adv = attack.generate(x=x_test)

    print('x_test ', x_test)
    print('x_test_adv ', x_test_adv)

    dist2 = utils.computeDist2(x_test, x_test_adv)
    print('test data dist2: ', dist2)

    distInf = utils.computeDistInf(x_test, x_test_adv)
    print('test data distInf: ', distInf)

    print('Cost time: ', time.time() - s)
Ejemplo n.º 34
0
def get_standard_template(file_string, key_frame, action_type, action_num):
    #key_frame为一个列表,存储两个相邻分解动作的最后一帧和第一帧
    data = load_data(file_string)
    ang_data = trans_data(data)
    f = open(action_type + '.txt', 'w')
    for i in range(len(key_frame) / 2):
        for j in range(len(ang_data)):
            cur_W = curve_fit(ang_data[j][key_frame[2 * i] -
                                          action_num:key_frame[2 * i]])
Ejemplo n.º 35
0
 def load_coord(self):
     """
     load two set of coord. of the system
     """
     oldsetfile = self.files['compare']
     
     self.vars['geom'] = tools.load_data(oldsetfile)
     
     return
Ejemplo n.º 36
0
 def prepare(self):
     """
     first, prepare work dir; then, the necessary files.
     """
     # load internal data.
     filename = self.files['interface']
     it = tools.load_data(filename)
     self.dim = it['parm']
     return
Ejemplo n.º 37
0
 def load(self):
     """
     load template.json
     """
     filename = self.files['template']
     obj = tools.load_data(filename)
     self.template = copy.deepcopy(obj)
     self.template_cmp = copy.deepcopy(obj)
     
     return obj
def rf_calibration_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    X_test=test[feature_cols]
    y=train['target']
    test_ids=test['id']
    print 'rf_calibration_solution'
    skf = cross_validation.StratifiedKFold(y, n_folds=5, random_state=42)
    calibration_method = 'isotonic'
    clf=get_tuned_rf()
    ccv = CalibratedClassifierCV(base_estimator=clf, method=calibration_method, cv=skf)
    ccv.fit(X_train,y)
    preds = ccv.predict_proba(X_test)
    write_submission(test_ids,preds,'submissions/rf_calibration_solution.csv')
Ejemplo n.º 39
0
    def modify_td(self):
        """
        tddft input, nstates & root would be updated.
        for oniom feature
        """
        # current interface file data.
        i_cur = tools.load_data(self.files['interface'])

        t = copy.deepcopy(self.template_cmp)

        # %charge & spin was kept. none was requred.
        # %molecular spec.
        t['mol'] = i_cur['mol']
        # %routine
        # in dyn. interface, gs was 1, the first-es is 2, et al.
        # so there are n_es + 1 states
        # but gaussian, root=1 is first es.
        # so, x - 1 is ok
        n_state = int(i_cur['parm']['n_state']) - 1
        i_state = (i_cur['parm']['i_state']) - 1
        routine = t['routine']
        content = routine['content']
        pat = re.compile("nstates=(\d+)", re.IGNORECASE)
        content = re.sub(pat, "nstates=" + str(n_state), content)
        pat = re.compile("root=(\d+)", re.IGNORECASE)
        content = re.sub(pat, "root=" + str(i_state), content)

        # calc force routine of gs if required
        pat = re.compile("(td\(.+?\))|(tda\(.+?\))|(cis\(.+?\))",
                         re.IGNORECASE)
        force_content = re.sub(pat, "", content)
        # gs single point calc.
        pat = re.compile("force", re.IGNORECASE)
        sp_content = re.sub(pat, "", force_content)
        # new content routine for es
        pat = re.compile("force", re.IGNORECASE)
        es_content = re.sub(pat, " ", content)

        # suppose the first occurance of */* is like b3lyp/6-31G* style
        pat = re.compile("\/[\S]+")
        es_content = re.sub(pat, "/ChkBasis", es_content, count=1)
        pat = re.compile("\/[\S]+")
        force_content = re.sub(pat, "/ChkBasis", force_content, count=1)
        # assign value
        t['routine']['content'] = sp_content
        t['routine']['es_content'] = content + " geom=AllCheck Guess=Read "
        if i_state == 0:
            t['routine'][
                'force_content'] = force_content + " geom=AllCheck Guess=Read "
            t['routine'][
                'es_content'] = es_content + " geom=AllCheck Guess=Read "
        # recover template
        self.template = t

        return
Ejemplo n.º 40
0
    def modify_dimer(self):
        """
        setup ao overlap calc.
        """
        # load interface data from previous and current step
        filename = self.files
        i_pre = tools.load_data(filename['previous'])
        i_cur = tools.load_data(filename['current'])
        t = copy.deepcopy(self.template_cmp)
        
        # $data section in gms.
        # molecular spec.
        mol = self.__merge_mols([i_cur['mol'], i_pre['mol']])
        t['@DATA']['mol'] = mol
        t['@DATA']['title'] = "ONLY CHECK CALC, IGNORE WARINING.."
        # re-build $data section.
                
        # modify charge & spin (kept)
        #
        at_contrl = t['@CONTRL']
        if 'MULT' in at_contrl:
            mol_spin = int(at_contrl['MULT'])
        else:
            mol_spin = 1

        if "ICHARG" in at_contrl:
            mol_chrg = int(at_contrl['ICHARG']) * 2
        else:
            mol_chrg = 0

        t['CONTRL'] = " $contrl scftyp=rhf runtyp=energy exetyp=check\n \
        NPRINT=3 MULT=%d ICHARG=%d NPRINT=3 $end\n" % (mol_spin, mol_chrg)

        # tddft is not necessary, so...
        if 'TDDFT' in at_contrl:
            del t['TDDFT']

        # recover template
        self.template = t
        self.__geom_data()
        
        return
Ejemplo n.º 41
0
    def modify_dimer(self):
        """
        setup ao overlap calc.
        """
        # load interface data from previous and current step
        filename = self.files
        i_pre = tools.load_data(filename['previous'])
        i_cur = tools.load_data(filename['current'])
        t = copy.deepcopy(self.template_cmp)

        # $data section in gms.
        # molecular spec.
        mol = self.__merge_mols([i_cur['mol'], i_pre['mol']])
        t['@DATA']['mol'] = mol
        t['@DATA']['title'] = "ONLY CHECK CALC, IGNORE WARINING.."
        # re-build $data section.

        # modify charge & spin (kept)
        #
        at_contrl = t['@CONTRL']
        if 'MULT' in at_contrl:
            mol_spin = int(at_contrl['MULT'])
        else:
            mol_spin = 1

        if "ICHARG" in at_contrl:
            mol_chrg = int(at_contrl['ICHARG']) * 2
        else:
            mol_chrg = 0

        t['CONTRL'] = " $contrl scftyp=rhf runtyp=energy exetyp=check\n \
        NPRINT=3 MULT=%d ICHARG=%d NPRINT=3 $end\n" % (mol_spin, mol_chrg)

        # tddft is not necessary, so...
        if 'TDDFT' in at_contrl:
            del t['TDDFT']

        # recover template
        self.template = t
        self.__geom_data()

        return
Ejemplo n.º 42
0
def loadData(datatype):

    if datatype == 'gtsrb_binary':
        x_train, x_test, y_train, y_test = load_data('gtsrb_binary', 2)
        input_shape = 3*48*48
    elif datatype == 'cifar10_binary':
        x_train, x_test, y_train, y_test = load_data('cifar10_binary', 2)
        input_shape = 3*32*32
    elif datatype == 'cifar10':
        x_train, x_test, y_train, y_test = load_data('cifar10', 2)
        x_train = x_train.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32)
        x_test = x_test.reshape((-1, 32, 32, 3)).transpose((0, 3, 1, 2)).astype(np.float32)
        input_shape = 3*32*32
    elif datatype == 'imagenet':
        x_train, x_test, y_train, y_test = load_data('imagenet', 2)
        x_train = x_train.reshape((-1, 3, 224, 224))
        x_test = x_test.reshape((-1, 3, 224, 224))
        input_shape = 3*224*224

    return x_train, x_test, y_train, y_test, input_shape
def main():
    train=load_data('train.csv')
    lbl_enc = preprocessing.LabelEncoder()
    train['target'] = lbl_enc.fit_transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train,y_train=split_data(train,feature_cols)
    clf_scores=clf_score(create_clf(),X_train[feature_cols],y_train)
    print clf_scores
    plt.plot(clf_scores)
    plt.xticks(range(len(clf_scores)), clf_scores.index, fontsize=14, rotation=90)
    plt.show()
Ejemplo n.º 44
0
 def __init__(self):   
    
     self.data = tools.load_data(filename = "interface.json")
     # interface_converter(filename = qm_interface)
     self.files = {"interface": "interface.json", "dyn": "inp.json"}
     
     # global control variable, not very useful now for the case of turbomole
     self.config = {}
     self.dyn = {}
     
     self.worker()        
     return                
Ejemplo n.º 45
0
    def modify_td_gs(self):
        """
        tddft input, nstates & root would be updated.
        """
        # current interface file data.
        i_cur = tools.load_data(self.files['interface'])
 
        t = copy.deepcopy(self.template_cmp)

        # %charge & spin was kept. none was requred.
        # %molecular spec.     
        t['@DATA']['mol'] = i_cur['mol']
        t['@DATA']['title'] = "energy & gradient.."    
        # %routine
        # in dyn. interface, gs was 1, the first-es is 2, et al.
        # so there are n_es + 1 states
        # but gaussian, gamess, 'root=1' is first es.
        # so, x - 1 is ok
        n_state = int(i_cur['parm']['n_state']) - 1
        i_state = int(i_cur['parm']['i_state']) - 1

        if i_state == 0:
            i_state = 1
        else:
            print "excited state condition [ignored]"

        # TDDFT SECTION
        at_tddft = t['@TDDFT']
        at_tddft['NSTATE'] = str(n_state)
        at_tddft['IROOT'] = str(i_state)
        mystring = self.build_list(at_tddft)        
        t['TDDFT'] = " $TDDFT " + mystring

        # CONTRL SECTION
        at_contrl = t['@CONTRL']
        at_contrl['RUNTYP'] = "ENERGY"
        if 'MULT' not in at_contrl:
            at_contrl['MULT'] = "1"
        if "ICHARG" not in at_contrl:
            # print at_contrl['ICHARG']
            at_contrl['ICHARG'] = "0"

        mystring = self.build_list(at_contrl)
        t['CONTRL'] = " $CONTRL " + mystring
 
        # recover template
        self.template2 = t
 
        self.__geom_data()
        
        return
Ejemplo n.º 46
0
    def modify_td(self):
        """
        tddft input, nstates & root would be updated.
        """
        # current interface file data.
        i_cur = tools.load_data(self.files['interface'])
 
        t = copy.deepcopy(self.template_cmp)
        
        # %charge & spin was kept. none was requred.
        # %molecular spec.     
        t['mol'] = i_cur['mol']        
        # %routine
        # in dyn. interface, gs was 1, the first-es is 2, et al.
        # so there are n_es + 1 states
        # but gaussian, root=1 is first es.
        # so, x - 1 is ok
        n_state = int(i_cur['parm']['n_state']) - 1
        i_state = (i_cur['parm']['i_state']) - 1
        routine = t['routine']
        content = routine['content']
        pat = re.compile("nstates=(\d+)", re.IGNORECASE)
        content = re.sub(pat, "nstates="+str(n_state), content)
        pat = re.compile("root=(\d+)", re.IGNORECASE)
        content = re.sub(pat, "root="+str(i_state), content)

        # calc force routine of gs if required
        pat = re.compile("(td\(.+?\))|(tda\(.+?\))|(cis\(.+?\))", re.IGNORECASE)
        force_content = re.sub(pat, "", content)
        # gs single point calc.
        pat = pat = re.compile("force", re.IGNORECASE)
        sp_content = re.sub(pat, "", force_content)
        # new content routine for es
        pat = re.compile("force", re.IGNORECASE)
        es_content = re.sub(pat, " ", content)

        # suppose the first occurance of */* is like b3lyp/6-31G* style
        pat = re.compile("\/[\S]+")
        es_content = re.sub(pat, "/ChkBasis", es_content, count=1)
        pat = re.compile("\/[\S]+")
        force_content = re.sub(pat, "/ChkBasis", force_content, count=1)
        # assign value
        t['routine']['content'] = sp_content
        t['routine']['es_content'] =  content +  " geom=AllCheck Guess=Read "        
        if i_state == 0:
            t['routine']['force_content'] = force_content + " geom=AllCheck Guess=Read "       
            t['routine']['es_content'] =  es_content +  " geom=AllCheck Guess=Read " 
        # recover template
        self.template = t
        
        return
def feature_selection_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=feature_engineering(train[feature_cols])
    X_test=feature_engineering(test[feature_cols])
    feature_cols=[col for col in X_train.columns if col not in ['mean','std','nonzero','feat_6','feat_82','feat_84']]
    X_train=X_train[feature_cols]
    X_test=X_test[feature_cols]
    print X_train.columns
    y=train['target']
    test_ids=test['id']
    print 'feature_selection_solution'
    cross_v(get_rf(),X_train.values,y.values)# mean 0.595288515439   std 0.593551044059 nonzero  0.597406303207
    #no fg 6 82 84 0.603600594376
    #0.600058535601
    clf=get_rf()
    clf.fit(X_train,y)
    preds = clf.predict_proba(X_test)
    write_submission(test_ids,preds,'submissions/feature_selection_rf100_84_82_6_nofg.csv')
def xgboost_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    X_test=test[feature_cols]
    y=train['target']
    test_ids=test['id']
    print 'rf_calibration_solution'
    dtrain = xgb.DMatrix(X_train, label=y)
    dtest = xgb.DMatrix(X_test, label=None)
    param = {'bst:max_depth':10, 'bst:min_child_weight': 4, 'bst:subsample': 0.5, 'bst:colsample_bytree':0.8,  'bst:eta':0.05}
    other = {'silent':1, 'objective':'multi:softprob', 'num_class':9, 'nthread': 4, 'eval_metric': 'mlogloss', 'seed':0}
    watchlist  = [(dtrain,'train')]
    full_param = other.copy()
    full_param.update(param)
    plst = full_param.items()
    bst= xgb.train(plst, dtrain, 300, watchlist)
    preds = bst.predict(dtest)
    write_submission(test_ids,preds,'submissions/xgboost_solution.csv')
def xgboost_param_solution():
    xgb=XGBoostClassifier(alpha=0, booster='gbtree', colsample_bytree=0.459971793632,
         early_stopping_rounds=30, eta=0.0305648288294,
         eval_metric='mlogloss', gamma=0.0669039612464, l=0, lambda_bias=0,
         max_delta_step=4, max_depth=14, min_child_weight=8, nthread=4,
         ntree_limit=0, num_class=9, num_round=1000,
         objective='multi:softprob', seed=84425, silent=0,
         subsample=0.972607582489, use_buffer=True)

    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    X_test=test[feature_cols]
   
    y=train['target']
    test_ids=test['id']
    
    xgb.fit(X_train, y)
    preds=xgb.predict_proba(X_test)
    write_submission(test_ids,preds,'submissions/xgboost_param_solution_76.csv')
Ejemplo n.º 50
0
    def prepare(self):
        """ load configure file """
        # dynamic info.
        self.dyn  = tools.load_data(self.files['dyn'])

        return     
def main():
    train=load_data('train.csv')
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train,y_train=split_data(train,feature_cols)
    grid_search(X_train[feature_cols],y_train,get_clfs())
Ejemplo n.º 52
0
    def collect_qm(self):
        """
        wrt down in one file
        """
        n_state = self.dim['n_state']
        interface = tools.load_data("interface.json")
        index_state = interface['parm']['i_state']
        # wrt files.
        fileout3 = open('qm_results.dat', 'w')
        # header
        fileout3.write('-----------------------------------------  \n')
        fileout3.write('Summary of QM calculations: \n')
        fileout3.write('-----------------------------------------  \n') 
        
        qm_interface = self.directory['root'] + "/" + "qm_interface"
        filein4=open(qm_interface, 'r')
        fileout3.write(filein4.read())
        fileout3.write('-----------------------------------------  \n')  
        filein4.close()

        fileout3.write('The electronic calculations focus on '+str(n_state)+' states: \n')
        for i_state in range(n_state) :
            fileout3.write('S'+str(i_state)+ '   ..  ' )
        fileout3.write('\n')
        fileout3.write('The S'+str(index_state-1)+' gradient should be computed !   \n') 
        fileout3.write('-----------------------------------------  \n')

        fileout3.write('Basis information: \n')
        filein4=open('qm_basis.dat','r') 
        fileout3.write(filein4.read())
        fileout3.write('-----------------------------------------  \n')
        filein4.close()

        fileout3.write('Energy of electronic states: \n')
        filein4=open('qm_energy.dat','r')
        fileout3.write(filein4.read())  
        fileout3.write('-----------------------------------------  \n')
        filein4.close()

        fileout3.write('Gradient on S'+str(index_state-1)+'  \n')
        filein4=open('qm_gradient.dat','r')
        fileout3.write(filein4.read())  
        fileout3.write('-----------------------------------------  \n')
        filein4.close()    
     
        fileout3.write('Nonadiabatic coupling elements  \n') 
        sourceFile = 'qm_nac.dat'
        if os.path.isfile(sourceFile):
            filein4=open('qm_nac.dat','r')
            fileout3.write(filein4.read())
            fileout3.write('-----------------------------------------  \n')
            filein4.close()
        else : 
            for i_state in range(n_state):
                for j_state in range(n_state):
                    fileout3.write('S'+str(i_state)+'    S'+str(j_state)+'   0.00000   \n')

        fileout3.write('-----------------------------------------  \n')               
        fileout3.close()         
            
        return
Ejemplo n.º 53
0
from activation_functions import sigmoid_function, tanh_function, linear_function,\
								 LReLU_function, ReLU_function

from NeuralNet import NeuralNetwork
from tools import load_data
import numpy as np

# Load data from Hot_or_Not website scrape
male_images, male_scores, fem_images, fem_scores = load_data()
image_length = male_images.shape[1]

settings = {

	# Preset Parameters
	"n_inputs" 				:  image_length, 		# Number of input signals
	"n_outputs"				:  1, 					# Number of output signals from the network
	"n_hidden_layers"		:  1,					# Number of hidden layers in the network (0 or 1 for now)
	"n_hiddens"				:  100,   				# Number of nodes per hidden layer
	"activation_functions"	:  [ LReLU_function, sigmoid_function ],		# Activation functions by layer

	# Optional parameters

	"weights_low"			: -0.1,		# Lower bound on initial weight range
	"weights_high"			: 0.1,  	# Upper bound on initial weight range
	"save_trained_network"  : False,	# Save trained weights or not.

	"batch_size"			: 1, 		# 1 for stochastic gradient descent, 0 for gradient descent
}

# Initialization
network = NeuralNetwork( settings )
Ejemplo n.º 54
0
    def prepare(self):
        """
        first, prepare work dir; then, the necessary files.
        """
        work_dir = self.directory["nac"]
        if os.path.exists(work_dir):
            shutil.rmtree(work_dir)
        if not os.path.exists(work_dir):
            os.makedirs(work_dir)

        sourceFile = self.directory["work_prev"] + "/mo.dat"
        destFile = self.directory["nac"] + "/mo_1.dat"
        shutil.copy2(sourceFile, destFile)

        sourceFile = self.directory["work_prev"] + "/ci.dat"
        destFile = self.directory["nac"] + "/ci_1.dat"
        shutil.copy2(sourceFile, destFile)

        sourceFile = self.directory["work"] + "/mo.dat"
        destFile = self.directory["nac"] + "/mo_2.dat"
        shutil.copy2(sourceFile, destFile)

        sourceFile = self.directory["work"] + "/ci.dat"
        destFile = self.directory["nac"] + "/ci_2.dat"
        shutil.copy2(sourceFile, destFile)

        sourceFile = self.directory["work"] + "/qm_results.dat"
        destFile = self.directory["nac"] + "/qm_results.dat"
        shutil.copy2(sourceFile, destFile)

        sourceFile = self.directory["work"] + "/" + self.files["dimension"]
        destFile = self.directory["nac"] + "/" + self.files["dimension"]
        shutil.copy2(sourceFile, destFile)

        sourceFile = self.directory["overlap"] + "/ao_overlap.dat"
        destFile = self.directory["nac"] + "/ao_overlap.dat"
        shutil.copy2(sourceFile, destFile)

        os.chdir(work_dir)

        # load internal data.
        filename = self.files["dimension"]
        dim = tools.load_data(filename)

        n_atom = dim["n_atom"]  # Number of atom
        n_state = dim["n_state"]  # Number of states
        n_ao = dim["n_basis"]  # number of basis functions
        n_occ = dim["nocc_allA"]  # number of occupied orbitals

        fileout1 = open("main_overlap_slater_input", "w")
        fileout1.write("                        read (*,*)  \n")
        fileout1.write("" + str(n_atom) + "               read (*,*) n_atom \n")
        fileout1.write("" + str(n_ao) + "                 read (*,*) n_ao \n")
        fileout1.write("" + str(n_occ) + "               read (*,*) n_ele_alpha \n")
        fileout1.write("" + str(n_occ) + "               read (*,*) n_ele_beta \n")
        fileout1.write("                        read (*,*)  \n")
        fileout1.write("" + str(n_state) + "               read (*,*) n_state \n")
        fileout1.write("                        read (*,*)  \n")
        fileout1.write("1                       read (*,*)  type_input  \n")
        fileout1.write("ci_1.dat                read (*,*)  filename_input1  \n")
        fileout1.write("ci_2.dat                read (*,*)  filename_input2  \n")
        fileout1.write("overlap.dat             read (*,*)  filename_input2  \n")
        fileout1.write("                        read (*,*)  \n")
        fileout1.write("0                       read (*,*) output_level  \n")
        fileout1.write("ci_overlap.dat          read (*,*) filename_output  \n")
        fileout1.close()

        print "NAC PREPARED"

        return
Ejemplo n.º 55
0
print "L=", L
print "N=", N
print "R=", R
print "noise=", noise

# Determine output numbers from albedo data file
albedo_file = open(albedo_fname)
outputs = []
line = albedo_file.readline()
while line != "":
  if line.startswith("#") and "Output" in line:
    outputs.append(int(line.split("=")[1]))
  line = albedo_file.readline()
albedo_file.close()

its, temp, alb, veget = tools.load_data(run_fname)
albedo_file = open(albedo_fname)
temperature_file = open(temperature_fname)
vegetation_file = open(vegetation_fname)
for i in range(5):
  albedo_file.readline()
  temperature_file.readline()
  vegetation_file.readline()

# Vegetation custom 2-color colormap
veg_cmap = matplotlib.colors.ListedColormap(["white","darkgreen"])

print "Outputs found:", outputs
for out in outputs:

  albedo = []
Ejemplo n.º 56
0
def main():
    print 'SSGPR Tuning'

    usage = 'usage: %prog [options]'
    parser = optparse.OptionParser(usage)
    generalgroup = optparse.OptionGroup(parser, 'General Options')
    datasetgroup = optparse.OptionGroup(parser, 'Dataset Options')
    optimizationgroup = optparse.OptionGroup(parser, 'Optimization Options')


    generalgroup.add_option('--projections', dest='projections', type='int', default=100, metavar='D', 
                      help='number of spectral projections (default: 100)')
    generalgroup.add_option('--nofixed', dest='fixed', action='store_false', default=True, 
                      help='tune sparse spectrum frequencies')
    generalgroup.add_option('--params', dest='params', default=None, metavar='[\sigma_n, \sigma_f, \ell_1,  ..., \ell_n]', 
                      help='set hyperparameters (takes precedence over --guess)')
    generalgroup.add_option('--seed', dest="seed", default=None, type='int', 
                      help='seed for the PRNG (default: None)')
    generalgroup.add_option('--yarp', dest="yarp", default=False, action='store_true', 
                      help='serialize to YARP learningMachine compatible files')

    # datasets for 4 different functions
    datasetgroup.add_option('-i', '--inputs', dest='inputs', default=None, metavar='IDX[,IDX]*', 
                      help='input column indices')
    datasetgroup.add_option('-o', '--outputs', dest='outputs', default=None, metavar='IDX[,IDX]*', 
                      help='output column indices')
    datasetgroup.add_option('--guess', dest='guess', default=None, metavar='DATASET', 
                      help='guess hyperparameters using specified dataset')
    datasetgroup.add_option('--optimize', dest='optimize', default=None, metavar='DATASET', 
                      help='optimize hyperparameters using specified dataset')
    datasetgroup.add_option('--train', dest='train', default=None, metavar='DATASET', 
                      help='train machine using specified dataset')
    datasetgroup.add_option('--test', dest='test', default=None, metavar='DATASET', 
                      help='test machine on specified dataset')

    # optimization options
    optimizationgroup.add_option('--solver', dest='solver', default='ralg', 
                      help='solver (default: ralg)')
    optimizationgroup.add_option('--verboseopt', dest='verboseopt', default=False, action='store_true', 
                      help='enable verbose optimization output')
    optimizationgroup.add_option('--ftol', dest='ftol', type='float', default=1e-4, metavar='TOL', 
                      help='function tolerance for stop condition (default: 1e-4)')
    optimizationgroup.add_option('--gtol', dest='gtol', type='float', default=1e-4, metavar='TOL', 
                      help='gradient tolerance for stop condition (default: 1e-4)')
    optimizationgroup.add_option('--maxtime', dest='maxtime', type='float', default=3600., metavar='SECONDS', 
                      help='maximum time (default: 3600)')
    optimizationgroup.add_option('--maxiters', dest='maxiters', type='int', default=2000, metavar='ITERS', 
                      help='maximum iterations (default: 2000)')
    optimizationgroup.add_option('--maxfevals', dest='maxfevals', type='int', default=5000, metavar='EVALS', 
                      help='maximum function evaluations (default: 5000)')

    parser.add_option_group(generalgroup)
    parser.add_option_group(datasetgroup)
    parser.add_option_group(optimizationgroup)

    (options, args) = parser.parse_args()

    numpy.random.seed(options.seed)

    input_cols = tools.strtoidx(options.inputs) or [0]
    output_cols = tools.strtoidx(options.outputs) or [-1]

    n = len(input_cols)
    p = len(output_cols)

    # some arbitrary default parameters and no hyperpriors
    sigma_o, sigma_o_prior = 2., NoPrior()
    l, l_prior = [10.] * n, [NoPrior()] * n
    sigma_n, sigma_n_prior = 0.2, NoPrior()

    # construct machine and feature mapping
    ssf = SparseSpectrumFeatures(n, nproj = options.projections, 
                                 sigma_o = sigma_o, sigma_o_prior = sigma_o_prior, 
                                 l = l, l_prior = l_prior, fixed = options.fixed)
    ssgpr = LinearGPR(n, p, ssf, sigma_n = sigma_n, sigma_n_prior = sigma_n_prior)

    print 'General Info'
    print '%12s: %s -> %s' % ('columns', input_cols, output_cols)
    print '%12s: %d' % ('#proj', options.projections)
    print '%12s: %s' % ('fixed', options.fixed)
    print '%12s: (%d -> %d) -> %d' % ('dimensions', n, ssf.outputdim(), p)
    print '%12s: %s' % ('seed', options.seed)


    # rudimentary guess of hyperparameters based on data
    if options.guess:
        print '\nHyperparameter Guess: %s' % (options.guess)

        guessx, guessy = tools.load_data(options.guess, input_cols, output_cols)
        print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + guessx.shape + guessy.shape)

        start = time.time()
        ssgpr.guessparams(guessx, guessy)
        end = time.time()
        print '%12s: %d seconds' % ('timing', end - start)

    # set hyperparameters if given
    if options.params is not None:
        ssgpr.setparams(list(eval(options.params)))

    # optimize hyperparameters
    if options.optimize:
        print '\nHyperparameter Optimization: %s' % (options.optimize)

        hyperx, hypery = tools.load_data(options.optimize, input_cols, output_cols)
        print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + hyperx.shape + hypery.shape)
        print '%12s: %s' % ('solver', options.solver)
        print '%12s: % g' % ('ftol', options.ftol)
        print '%12s: % g' % ('gtol', options.gtol)
        print '%12s: % g seconds' % ('max time', options.maxtime)
        print '%12s: % d' % ('max fevals', options.maxfevals)
        print '%12s: % d' % ('max iters', options.maxiters)

        start = time.time()
        res = ssgpr.optimize(hyperx, hypery, solver = options.solver, verbose = options.verboseopt, 
                       ftol = options.ftol, gtol = options.gtol, 
                       maxtime = options.maxtime, maxIter = options.maxiters, maxFunEvals = options.maxfevals, 
                       checkgrad = False)
        end = time.time()
        print '%12s: % g seconds' % ('timing', end - start)
        print '%12s: % g' % ('opt lml', res.ff)
        print '%12s: %s' % ('stop cond', res.msg)
        print '%12s: % d' % ('fevals', res.evals['f'])
        print '%12s: % d' % ('dfevals', res.evals['df'])
        print '%12s: % d' % ('iters', res.evals['iter'])

    # train ssgpr using dataset
    if options.train:
        print '\nTraining: %s' % (options.train)

        trainx, trainy = tools.load_data(options.train, input_cols, output_cols)
        print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + trainx.shape + trainy.shape)

        start = time.time()
        ssgpr.train(trainx, trainy)
        end = time.time()
        print '%12s: % g seconds' % ('timing', end - start)


        lml = ssgpr.lmlfunc()
        print '%12s: % g' % ('lml', lml)

    # test ssgpr on dataset
    if options.test:
        print '\nTesting: %s' % (options.test)

        testx, testy = tools.load_data(options.test, input_cols, output_cols)
        print '%12s: [%d x %d] -> [%d x %d]' % (('data', ) + testx.shape + testy.shape)

        start = time.time()
        testy_p, testy_pv = ssgpr.predict(testx)
        end = time.time()
        print '%12s: % g seconds' % ('timing', end - start)

        se = (testy - testy_p)**2
        nse = se / testy.var(axis=0)
        lp = (se / testy_pv) + numpy.log(2. * numpy.pi) + numpy.log(testy_pv)

        print '%12s: %s' % ('mse', se.mean(axis=0))
        print '%12s: %s' % ('nmse', nse.mean(axis=0))
        print '%12s: %s' % ('nmlp', 0.5 * lp.mean(axis=0))

    if options.yarp:
        print '\nYarp LearningMachine Serialization'

        machine_fn = 'ssgpr.model'
        tools.serialize_machine(machine_fn, ssgpr)
        print '%12s: %s' % ('machine', machine_fn)

        preprocessor_fn = 'ssf.model'
        tools.serialize_preprocessor(preprocessor_fn, ssf)
        print '%12s: %s' % ('preproc', preprocessor_fn)
"""
from sklearn import ensemble
from tools import load_data
from sklearn.metrics import log_loss
from sklearn.calibration import CalibratedClassifierCV
from feature_engineering import feature_engineering
from sklearn import cross_validation
from tools import cross_v
import matplotlib.pyplot as plt
plt.style.use('ggplot')

def get_rf():
    forest=ensemble.RandomForestClassifier(n_estimators=100)
    return forest
    
train=load_data('train.csv')
feature_cols = [col for col in train.columns if col not in  ['id','target']] 
X_train=feature_engineering(train[feature_cols]).values
y=train['target'].values
X_train, X_test, y_train, y_test=cross_validation.train_test_split(X_train,y,test_size=0.33,random_state=1)

skf = cross_validation.StratifiedKFold(y_train, n_folds=10, random_state=42)
calibration_method = 'isotonic'
clf=get_rf()
ccv = CalibratedClassifierCV(base_estimator=clf, method=calibration_method, cv=skf)

#ccv.fit(X_train,y_train)
#pred=ccv.predict_proba(X_test)
clf.fit(X_train,y_train)
pred=clf.predict_proba(X_test)
score=log_loss(y_test,pred)
Ejemplo n.º 58
0
  
lumin = min_lumin
while lumin <= max_lumin + lumin_step:

  lumins.append(lumin)
  teq_temps.append((lumin*917.0*(1-0.5)/ 5.670367e-8)**0.25-273.15)

  plt.clf()
  power_runs_avg = []
 
  foo = [] 
  for run in range(1,runs_per_lumin+1):
  
    fname = "L%.2f_r%02d.dat" % (lumin, run)
    print "Reading %s ..." % fname
    its, temp, albedo, veget = tools.load_data(datadir + fname)
    avg_temp = sum(temp)/float(len(temp))

    foo.append(avg_temp)
    
    # compute and plot power spectrum
    freqs, power = tools.power_spectrum(temp, sampling=1.0)
    freqs = freqs[1:]
    power = power[1:]
    if len(power_runs_avg) == 0:
      power_runs_avg = power
    else:
      for i in range(len(power_runs_avg)):
        power_runs_avg[i] += power[i]
    plt.loglog(freqs, power, color="blue", alpha=0.25)
Ejemplo n.º 59
0
    def get_dim_info(self):
        """
        obtain dimension data.
        such as number of atoms and et al.
        parser gamess-us log file.
        """
        # default setting
        myobj = tools.load_data(self.files['interface'])
        self.dim['n_state'] = myobj['parm']['n_state']
        self.dim['i_state'] = myobj['parm']['i_state']
        # read 
        logfile = self.files['log']
        fp = open(logfile, "r")

        line = "STARTER"
        pat = re.compile("TOTAL NUMBER OF BASIS SET SHELLS")
        
        while line != "":
            line = fp.readline()
            m = pat.search(line)
            if m is not None:
                break
        # shell num.
        t_line = line
        # print t_line
        val = t_line.split("=")[1]
        n_shell = int(val)     
        
        # READ THE FOLLOWING LINES
        # 9 lines
        # TOTAL NUMBER OF BASIS SET SHELLS             =   10
        # NUMBER OF CARTESIAN GAUSSIAN BASIS FUNCTIONS =   38
        # NUMBER OF ELECTRONS                          =   14
        # CHARGE OF MOLECULE                           =    0
        # SPIN MULTIPLICITY                            =    1
        # NUMBER OF OCCUPIED ORBITALS (ALPHA)          =    7
        # NUMBER OF OCCUPIED ORBITALS (BETA )          =    7
        # TOTAL NUMBER OF ATOMS                        =    2
        # THE NUCLEAR REPULSION ENERGY IS       22.5117346394
        #       
        # number of cart gaussian basis functions
        t_line = fp.readline()
        val = t_line.split("=")[1]
        self.dim['n_basis'] = int(val)
        #print t_line
        # number of electrons
        t_line = fp.readline()
        val = t_line.split("=")[1]
        self.dim['n_elec'] = int(val)

        # mol. charge
        t_line = fp.readline()
        val = t_line.split("=")[1]
        charge = int(val)

        # spin-mult
        t_line = fp.readline()
        val = t_line.split("=")[1]
        spin = int(val)

        # number-occupied-orbitals-alpha
        t_line = fp.readline()
        val = t_line.split("=")[1]
        self.dim['neleA'] = int(val)        

        # number-occupied-orbitals-beta
        t_line = fp.readline()
        val = t_line.split("=")[1]
        self.dim['neleB'] = int(val)                
        #print line
        # number-of-atoms
        t_line = fp.readline()
        val = t_line.split("=")[1]
        self.dim['n_atom'] = int(val)

        # other
        self.dim['noccA'] = self.dim['neleA']
        self.dim['nvirA'] = self.dim['n_basis'] - self.dim['neleA']
        self.dim['nvir_allA'] = self.dim['nvirA']
        self.dim['nocc_allA'] = self.dim['noccA']
                 
        # TDDFT INPUT PARAMETERS
        pat = re.compile("TDDFT INPUT PARAMETERS")
        line = "starter"
        while line != "":
            line = fp.readline()
            m = pat.search(line)
            if m is not None:
                break
        line = fp.readline()
        # reading...
        #   NSTATE=       3  IROOT=       1   MULT=       1
        t_line = fp.readline()
        pat0 = re.compile("NSTATE=(.*)IROOT=(.*)MULT=(.*)")
        m = pat0.search(t_line)
        if m is not None:
            self.dim['n_state'] = int(m.group(1)) + 1 # because of the ground state.
            self.dim['i_state'] = int(m.group(2))
        else:
            print "<^WARNING> CANNOT FIND TD-DFT INPUT PARAMETERS SETTING: [suppose it to be ground state]"
         
        fp.close()
                
        tools.dump_data('dimension.json', self.dim)                

        return
def main():
    train=load_data('train.csv')
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=feature_engineering(train[feature_cols])
    y=train['target']
    grid_search(X_train,y,get_clfs())