コード例 #1
0
ファイル: obj4.py プロジェクト: Nicholas-t/Seelk_interview
def obj4_2(save):
    df_country = pd.read_parquet('data/output_country.parquet.gzip')
    countries = df_country.country.unique()
    country_indexes = {}
    for c in countries:
        country_indexes[c] = df_country.index[df_country['country'] ==
                                              c].tolist()
    df_numeric = pd.read_parquet('data/output_numeric.parquet.gzip')
    new = []
    for (country, indexes) in country_indexes.items():
        rows = df_numeric.iloc[indexes, :]
        temp = {
            'country': country,
            'avg_points': rows['points'].mean(axis=0),
            'sd_points': rows['points'].std(axis=0, ddof=0),
            'avg_price': rows['price'].mean(axis=0),
            'sd_price': rows['price'].std(axis=0, ddof=0)
        }
        new += [temp]
    #print(str(len(new))+' countries aggeregated')
    df_new = pd.DataFrame(new)
    #print(df_new)
    df_new.to_parquet('data/output_avg_std.parquet.gzip', compression='gzip')
    if save:
        mkdir('aggeregated')
        df_new.to_parquet('aggeregated/output_avg_std.parquet.gzip',
                          compression='gzip')
        print('folder "aggeregated" created')
コード例 #2
0
    def execute(self):
        model, cls_info, history, ft_history = self.fit_model()

        utils.mkdir(self.dst_dir, rm=True)
        model.save(self.est_file)

        mutils.save_model_info(self.info_file, self.graph_file, model)

        with open(self.cls_file, 'wb') as f:
            pickle.dump(cls_info, f)
        print(f'Classes: {cls_info}')

        utils.plot(history, self.hist_file)
        utils.plot(ft_history, self.ft_hist_file)

        def get_min(loss):
            min_val = min(loss)
            min_ind = loss.index(min_val)
            return min_val, min_ind

        print('Before fine-tuning')
        min_val, min_ind = get_min(history['val_loss'])
        print(f'val_loss: {min_val} (Epochs: {min_ind + 1})')

        print('After fine-tuning')
        min_val, min_ind = get_min(ft_history['val_loss'])
        print(f'val_loss: {min_val} (Epochs: {min_ind + 1})')
コード例 #3
0
def load_agw_1d(base_dir, get_feats=False):
    if not path.exists(base_dir + '/agw_data'):
        mkdir(base_dir + '/agw_data')
        urllib.urlretrieve(
            'https://raw.githubusercontent.com/wjmaddox/drbayes/master/experiments/synthetic_regression/ckpts/data.npy',
            filename=base_dir + '/agw_data/data.npy')

    def features(x):
        return np.hstack([x[:, None] / 2.0, (x[:, None] / 2.0)**2])

    data = np.load(base_dir + '/agw_data/data.npy')
    x, y = data[:, 0], data[:, 1]
    y = y[:, None]
    f = features(x)

    x_means, x_stds = x.mean(axis=0), x.std(axis=0)
    y_means, y_stds = y.mean(axis=0), y.std(axis=0)
    f_means, f_stds = f.mean(axis=0), f.std(axis=0)

    X = ((x - x_means) / x_stds).astype(np.float32)
    Y = ((y - y_means) / y_stds).astype(np.float32)
    F = ((f - f_means) / f_stds).astype(np.float32)

    if get_feats:
        return F, Y

    return X[:, None], Y
コード例 #4
0
    def execute(self):
        estimator = load_model(self.est_file)

        with open(self.cls_file, 'rb') as f:
            cls_info = pickle.load(f)

        pred_labels, true_labels, output = [], [], []

        for subdir in os.listdir(self.src_dir):
            for f in os.listdir(os.path.join(self.src_dir, subdir)):
                filename = os.path.join(self.src_dir, subdir, f)
                img = utils.load_target_image(filename, self.input_size)
                pred_class = np.argmax(estimator.predict(img))
                pred_label = cls_info[pred_class]
                pred_labels.append(pred_label)

                true_label = subdir
                true_labels.append(true_label)

                output.append(f'{filename} -> {pred_label}')

        report = classification_report(true_labels, pred_labels)
        labels = list(cls_info.values())
        cnfmtx = confusion_matrix(true_labels, pred_labels, labels)
        cm = pd.DataFrame(cnfmtx, index=labels, columns=labels)

        utils.mkdir(self.dst_dir, rm=True)
        with open(self.drs_file, 'w') as f:
            f.writelines(output)

        with open(self.srs_file, 'w') as f:
            f.write(report)
            f.write('¥n¥n')
            f.write(str(cm))
            f.write('¥n')
コード例 #5
0
 def singleVolume(cls, vid: int, name: str):
     c = request(f"http://{cls.downSite}.wenku8.com/packtxt.php?aid={cls.novel.id}&vid={vid}&charset=gbk",
                 SelfUser.cookies)
     chapter_dir = cls.root_dir + "/" + name
     mkdir(chapter_dir)
     with open(chapter_dir + "/" + name + ".txt", "w") as f:
         f.write(no_utf8_code(c.text))
コード例 #6
0
    def save_model(self):
        """
        保存模型
        :return: 保存.pkl文件
        """
        mkdir(self.args.result_path)
        torch.save(self.model.state_dict(), self.args.result_path + 'GAT_PPI_model.pkl')

        print("模型已保存成功!")
コード例 #7
0
    def save_model(self):
        """
        保存模型
        """
        mkdir(self.args.result_path)
        torch.save(
            self.model.state_dict(), self.args.result_path + self.args.model +
            '_' + self.args.dataset_name + '_model.pkl')

        print("模型已保存成功!")
コード例 #8
0
ファイル: obj2.py プロジェクト: Nicholas-t/Seelk_interview
def obj2(save):
    df = pd.read_csv('data/data.csv')
    df.to_parquet('data/output.parquet.gzip', compression='gzip')
    if save:
        mkdir('original')
        df.to_parquet('original/output.parquet.gzip', compression='gzip')
        print('folder "original" created')


#print(df)
コード例 #9
0
 def pictures(cls, cid, is_resize, name):
     imgs = get_imgs(status=cls.novel.statusCode, aid=cls.novel.id, cid=cid, cookies=SelfUser.cookies)
     chapter_dir = cls.root_dir + "/" + name
     mkdir(chapter_dir)
     imgs_dir = chapter_dir + "/" + "插图"
     mkdir(imgs_dir)
     for i in imgs:
         with open(imgs_dir + "/" + str(imgs.index(i)) + ".jpg", "wb") as f:
             f.write(
                 resize(request(i).content if is_resize else request(i).content))
コード例 #10
0
ファイル: obj3.py プロジェクト: Nicholas-t/Seelk_interview
def obj3(save):
    numeric = ['id', 'points', 'price']
    df = pd.read_parquet('data/output.parquet.gzip')
    df_clean = df.filter(numeric, axis=1).dropna().reset_index()
    df_clean.to_parquet('data/output_numeric.parquet.gzip', compression='gzip')
    if (save):
        mkdir('clean')
        df_clean.to_parquet('clean/output_numeric.parquet.gzip',
                            compression='gzip')
        print('folder "clean" created')
コード例 #11
0
ファイル: gat_ppi_test.py プロジェクト: Sanchez2020/Demo
 def save_embedding(self, emds):
     """
     保存嵌入结果
     :param emds: 测试集的嵌入表示列表
     :return: 如:embedding_1.csv
     """
     print("正在保存嵌入结果...")
     mkdir(self.args.result_path)
     count = 1
     for o in emds:
         result = pd.DataFrame(o.cpu().numpy())
         result.to_csv(self.args.result_path + 'embedding_' + str(count) + '.csv', index=None)
         count += 1
     print("嵌入结果保存成功!")
コード例 #12
0
ファイル: gat_ppi_test.py プロジェクト: Sanchez2020/Demo
 def save_preds(self, preds):
     """
     保存预测值
     :param preds: 测试集的预测值列表
     :return: 如:pred_1.csv
     """
     print("正在保存测试集的预测值...")
     mkdir(self.args.result_path)
     count = 1
     for o in preds:
         result = pd.DataFrame(o.cpu().numpy())
         result.to_csv(self.args.result_path + 'pred_' + str(count) + '.csv', index=None)
         count += 1
     print("测试集的预测值保存成功!")
コード例 #13
0
ファイル: gat_ppi_test.py プロジェクト: Sanchez2020/Demo
 def save_reals(self, reals):
     """
     保存真实值
     :param reals: 测试集的真实值列表
     :return: 如:real_1.csv
     """
     print("正在保存测试集的真实值...")
     mkdir(self.args.result_path)
     count = 1
     for o in reals:
         result = pd.DataFrame(o.cpu().numpy())
         result.to_csv(self.args.result_path + 'real_' + str(count) + '.csv', index=None)
         count += 1
     print("测试集的真实值保存成功!")
コード例 #14
0
 def find_address(self, address):
     """return path where net and training info are saved"""
     if address == 'last':
         addresses = sorted(os.listdir(self.res_dir))
         tb_address = os.path.join(self.tb_dir, str(len(addresses)))
         address = os.path.join(self.res_dir, addresses[-1])
     elif address is None:
         now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
         address = os.path.join(self.res_dir, now)
         mkdir(address)
         tb_address = os.path.join(self.tb_dir, now)
     else:
         tb_address = None
     return address, tb_address
コード例 #15
0
def load_matern_1d(base_dir):
    if not path.exists(base_dir + '/matern_data/'):
        mkdir(base_dir + '/matern_data/')

        def gen_1d_matern_data():
            from GPy.kern.src.sde_matern import Matern32
            np.random.seed(4)

            lengthscale = 0.5
            variance = 1.0
            sig_noise = 0.15

            n1_points = 200
            x1 = np.random.uniform(-2, -1, n1_points)[:, None]

            n2_points = 200
            x2 = np.random.uniform(0.5, 2.5, n2_points)[:, None]

            no_points = n1_points + n2_points
            x = np.concatenate([x1, x2], axis=0)
            x.sort(axis=0)

            k = Matern32(input_dim=1,
                         variance=variance,
                         lengthscale=lengthscale)
            C = k.K(x, x) + np.eye(no_points) * sig_noise**2

            y = np.random.multivariate_normal(np.zeros((no_points)), C)[:,
                                                                        None]

            x_means, x_stds = x.mean(axis=0), x.std(axis=0)
            y_means, y_stds = y.mean(axis=0), y.std(axis=0)

            X = ((x - x_means) / x_stds).astype(np.float32)
            Y = ((y - y_means) / y_stds).astype(np.float32)

            return X, Y

        x, y = gen_1d_matern_data()
        xy = np.concatenate([x, y], axis=1)
        np.save(base_dir + '/matern_data/matern_1d.npy', xy)
        return x, y
    else:
        xy = np.load(base_dir + '/matern_data/matern_1d.npy')
        x = xy[:, 0]
        x = x[:, None]
        y = xy[:, 1]
        y = y[:, None]
        return x, y
コード例 #16
0
 def loop_test(self, dataset, criterion):
     """Forward loop over test data"""
     self.net.eval()
     for i in range(len(dataset)):
         seq = dataset.sequences[i]
         us, xs = dataset[i]
         with torch.no_grad():
             hat_xs = self.net(us.cuda().unsqueeze(0))
         loss = criterion(xs.cuda().unsqueeze(0), hat_xs)
         mkdir(self.address, seq)
         mondict = {
             'hat_xs': hat_xs[0].cpu(),
             'loss': loss.cpu().item(),
         }
         pdump(mondict, self.address, seq, 'results.p')
コード例 #17
0
def main(args):
    dataset = args.dataset
    split = args.split
    method = args.method
    network = args.network
    width = args.width
    batch_size = args.batch_size
    valprop = args.valprop
    num = args.num
    data_dir = args.data_folder
    hpo_results_dir = args.hpo_results_dir
    results_dir = args.results_dir

    if dataset == "flights":
        split = "800k" if split == "1" else "2M"

    df = get_best_configs(hpo_results_dir)
    df = df[(df.dataset == dataset) & (df.split == split) & (df.method == method) &
            (df.network == network) & (df.valprop == str(valprop)) &
            (df.width == str(width)) & (df.batch_size == str(batch_size))]
    if len(df) > 0:
        config = df.to_dict('records')[0]
    else:
        raise RuntimeError("HPO results for chosen config not found.")

    epochs = int(config["best_itr"])

    save_path = f"{results_dir}/{dataset}/{split}/{valprop}/{method}/{network}/{width}/{batch_size}/{num}"
    mkdir(save_path)

    # create data
    trainset, testset, N_train, input_dim, output_dim = get_dset_split(dataset, split, data_dir)

    # create net
    if "MLP" in method:
        method = method[:-4]

    keep_trying = True
    while keep_trying:
        net = create_net(method, config, input_dim, output_dim, N_train, network, width, cuda)

        trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                                  shuffle=True, num_workers=0, pin_memory=cuda)
        testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                                 shuffle=False, num_workers=0, pin_memory=cuda)

        # train net
        keep_trying = train_loop(net, trainloader, testloader, epochs, save_path)
コード例 #18
0
    def save_result(self):
        """
        保存节点预测结果
        :return: 以csv文件存入文件
        """
        mkdir(self.args.result_path)
        pre = self.model()[self.data.test_mask].max(1)[1].view(1000, 1)
        real = self.data.y[self.data.test_mask].view(1000, 1)
        result = torch.cat((pre, real), 1).detach().cpu().numpy()
        index = ["node_" + str(x) for x in range(1000)]
        columns = ["prediction", "real"]
        result = pd.DataFrame(result, index=index, columns=columns)
        result.to_csv(self.args.result_path + self.args.model + '_' +
                      self.args.dataset_name + '_result.csv',
                      index=None)

        print("测试集的节点预测结果保存成功!")
コード例 #19
0
def load_official_flight(base_dir, k800=False):
    if not path.exists(base_dir + '/flight'):
        mkdir(base_dir + '/flight')

    if not path.isfile(base_dir + '/flight/filtered_data.pickle'):
        urllib.urlretrieve(
            'https://javierantoran.github.io/assets/datasets/filtered_flight_data.pickle.zip',
            filename=base_dir + '/flight/filtered_flight_data.pickle.zip')

        with zipfile.ZipFile(
                base_dir + '/flight/filtered_flight_data.pickle.zip',
                'r') as zip_ref:
            zip_ref.extractall(base_dir + '/flight/')

    file1 = base_dir + '/flight/filtered_data.pickle'
    filtered = pd.read_pickle(file1)

    inputs = filtered[[
        'Month', 'DayofMonth', 'DayOfWeek', 'DepTime', 'ArrTime', 'AirTime',
        'Distance', 'plane_age'
    ]].values

    outputs = filtered[['ArrDelay']].values

    if k800 is False:
        X_train = inputs[:-100000].astype(np.float32)
        y_train = outputs[:-100000].astype(np.float32)
        X_test = inputs[-100000:].astype(np.float32)
        y_test = outputs[-100000:].astype(np.float32)
    else:
        X_train = inputs[:700000].astype(np.float32)
        y_train = outputs[:700000].astype(np.float32)
        X_test = inputs[700000:800000].astype(np.float32)
        y_test = outputs[700000:800000].astype(np.float32)

    x_means, x_stds = X_train.mean(axis=0), X_train.std(axis=0)
    y_means, y_stds = y_train.mean(axis=0), y_train.std(axis=0)

    x_stds[x_stds < 1e-10] = 1.

    X_train = ((X_train - x_means) / x_stds)
    y_train = ((y_train - y_means) / y_stds)
    X_test = ((X_test - x_means) / x_stds)
    y_test = ((y_test - y_means) / y_stds)

    return X_train, X_test, x_means, x_stds, y_train, y_test, y_means, y_stds
コード例 #20
0
    def save_embedding(self):
        """
        保存嵌入结果
        :return: 以csv格式存入文件
        """
        mkdir(self.args.result_path)
        embedding = self.model.hidden_representations[-2][
            self.data.test_mask].detach().cpu().numpy()

        print("正在对测试集的嵌入结果进行降维可视化...")

        X = embedding
        Y = self.data.y[self.data.test_mask].detach().cpu().numpy()
        GATTrainer.embed_visualization(self, X=X, Y=Y)

        index = ["node_" + str(x) for x in range(1000)]
        columns = ["x_" + str(x) for x in range(len(embedding[0]))]
        embedding = pd.DataFrame(embedding, index=index, columns=columns)
        embedding.to_csv(self.args.result_path + self.args.model + '_' +
                         self.args.dataset_name + '_embedding.csv',
                         index=None)

        print("测试集的节点嵌入表示保存成功!")
def imageCorrection(image_map, category_id_name):
    img_file = dir_name + image_map['file_name']
    color = (0, 255, 0)
    x = int(image_map['x'])
    y = int(image_map['y'])
    w = int(image_map['w'])
    h = int(image_map['h'])

    img = cv2.imread(img_file)
    # frame_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    frame_gray = img
    # x = max(x, y)
    # w = max(w, h)
    # y = x
    # h = w

    img_tmp = frame_gray[y:y + h, x:x + w]
    # img_tmp = frame_gray[0:frame_gray.shape[0], 0:frame_gray.shape[1]]

    # cv2.rectangle(frame_gray, (x, y), (x+w, y+h), color, thickness=2)

    directory = "../datas/" + category_id_name + "_feature/"
    utils.mkdir(directory)
    cv2.imwrite(directory + image_map['file_name'], img_tmp)
コード例 #22
0
# from html4vision import Col, imagetable
from src.utils import mkdir

do_scores = False
do_html = False

dir_data = '../data/v0.1.2/'
dir_interim = '../data/interim/'
f_datalist = f'{dir_data}lists/verification_pairs_list_5_fold.pkl'
f_features = f'{dir_interim}features-sphereface-off-the-shelf.pkl'

dir_out = '../results/verification/off-the-shelf-sphereface/'

datatable = pd.read_pickle(f_datalist)

mkdir(dir_out)
if do_scores or 'score' not in datatable:
    features = pd.read_pickle(f_features)

    features = {k.replace('../', ''): v for k, v in features.items()}
    datatable['score'] = datatable.apply(
        lambda row: np.dot(features[row['p1']], features[row['p2']]), axis=1)

ts_matches = []
sim = []
thresholds = np.arange(datatable.score.values.min(),
                       datatable.score.values.max(), 100)
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
コード例 #23
0
ファイル: main.py プロジェクト: 0x7FFFFF/wenku8downloader
     try:
         self_user = SelfUser(account, password)
         print("登录成功!")
         cookies = self_user.cookies
         print("保存Cookies中...")
         pickle.dump(cookies, open(cookiePath, 'wb'))
         print("保存成功!")
     except LoginFailedError:
         print("登录错误!")
         sys.exit()
 isResize = True if input("是否缩放图片?(Y/N):").lower() == "y" else False
 while True:
     try:
         id = input("请输入作品ID:")
         novel = Novel(int(id))
         mkdir("download")
         root_dir = os.path.dirname('.') + "download/" + novel.title
         print(f"正在下载{novel.title}...")
         mkdir(root_dir)
         for i in novel.volumeList:
             chapter_dir = root_dir + "/" + i["name"]
             mkdir(chapter_dir)
             print(f"正在下载{i['name']}")
             for j in i["chapters"]:
                 if j["name"] == "插图":
                     print(f"下载{i['name']}插图中...")
                     imgs_dir = chapter_dir + "/" + "插图"
                     mkdir(imgs_dir)
                     imgs = get_imgs(status=novel.statusCode,
                                     aid=novel.id,
                                     cid=j['cid'],
コード例 #24
0
config = load_yaml()
Feature.dir = config["path"]["feature"]
"""
import category_encoders as ce
class NeighborhoodOrdinal(Feature):
    def create_features(self):
        # self.columns には特徴量生成に必要な列名を書く
        self.columns = ["Neighborhood"]
        self.load(self.columns)
        oe = ce.ordinal.OrdinalEncoder()

        self.train["Neighborhood_ordinal"] = oe.fit_transform(
            self.train["Neighborhood"]
        )
        self.test["Neighborhood_ordinal"] = oe.transform(self.test["Neighborhood"])
        create_memo("Neighborhood_ordinal", "Neighborhood をラベル化した")
"""

if __name__ == "__main__":
    # train / test を列ごとに分割して保存しておく
    save_column()

    # CSVのヘッダーを書き込み
    create_memo("feature", "memo")

    mkdir(Feature.dir)
    args = get_arguments()

    generate_features(globals(), args.overwrite)
コード例 #25
0
ファイル: runner.py プロジェクト: takaiyuk/kaggle-instacart
import argparse
import os

from abstractRunner import AbstractRunner
from src.utils import mkdir, load_yaml

parser = argparse.ArgumentParser(description="argparse for run.py")
parser.add_argument("--debug", action="store_true", help="debug mode")
parser.add_argument(
    "--model",
    default="lgb",
    required=False,
    choices=["lgb", "cb", "xgb", "nn", "linear"],
    help="model type",
)
p = vars(parser.parse_args())


class Runner(AbstractRunner):
    def __init__(self, parser, config):
        super().__init__(parser, config)


if __name__ == "__main__":
    config = load_yaml()
    for k, v in config["path"].items():
        # path が file ではなく directory の場合 mkdir する
        if os.path.splitext(os.path.basename(v))[1] == "":
            mkdir(v)
    Runner(p, config).run()
コード例 #26
0
    ('lucene', 'v4.0'),
    ('mahout', 'v0.8'),
    ('openjpa', 'v2.0.1'),
    ('openjpa', 'v2.2.0'),
    ('pig', 'v0.8.0'),
    ('pig', 'v0.11.1'),
    ('solr', 'v4.4.0'),
    ('tika', 'v1.3'),
    ('zookeeper', 'v3.4.5'),
]

#projects = [('hibernate', 'v3.5.0b2')]
for project, version in projects:
    path = '/'.join(['data', project, version])
    print(path)
    mkdir(path + '/queries')

    with open(path + '/ids.txt') as f:
        bugs = [x.strip() for x in f]

    p = etree.XMLParser()
    hp = etree.HTMLParser()

    for bugid in bugs:
        print("Fetching bugid", bugid)
        fname = project.upper() + '-' + bugid
        #        fname = 'HHH-' + bugid
        r = requests.get(url_base % (fname, fname))
        try:
            tree = etree.parse(StringIO(r.text), p)
        except etree.XMLSyntaxError:
コード例 #27
0
def main(args):
    extra_string = ''

    if args.dataset == 'flights':
        if args.n_split == 0:
            extra_string += '_2M'
        elif args.n_split == 1:
            extra_string += '_800k'
        else:
            raise Exception(
                'Only Valid values for flight splits are 0 (2M) or 1 (800k)')
        extra_string += '_valprop_' + str(args.valprop)

    elif args.dataset in [
            'boston', 'concrete', 'energy', 'power', 'wine', 'yacht', 'kin8nm',
            'naval', 'protein', 'boston_gap', 'concrete_gap', 'energy_gap',
            'power_gap', 'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap',
            'protein_gap'
    ]:
        extra_string += '_split_' + str(args.n_split)
        extra_string += '_valprop_' + str(args.valprop)

    working_dir = args.result_folder + '/' + args.dataset + extra_string + '/' + args.method +\
        ('-' + args.network if args.network != "ResNet" else '') + '/' + str(args.width) + '/' + str(args.batch_size) +\
        '/' + args.run_id
    print("WORKING DIR")
    print(working_dir)

    # Create data dir if necessary
    if not os.path.exists(args.data_folder):
        mkdir(args.data_folder)

    # Every process has to lookup the hostname
    host = hpns.nic_name_to_host(args.nic_name)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=False)

    # Start a nameserver:
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=working_dir)
    ns_host, ns_port = NS.start()

    workers = []
    for i in range(args.num_workers):
        print("CREATING WORKER:", i)
        if args.dataset == 'spirals':
            worker_class = create_SpiralsWorker(args.method, args.network,
                                                args.width, args.batch_size)
            worker = worker_class(early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        elif args.dataset == 'flights':
            worker_class = create_FlightWorker(args.method, args.network,
                                               args.width, args.batch_size)
            worker = worker_class(base_dir=args.data_folder,
                                  prop_val=args.valprop,
                                  k800=(args.n_split == 1),
                                  early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        elif args.dataset in [
                'boston', 'concrete', 'energy', 'power', 'wine', 'yacht',
                'kin8nm', 'naval', 'protein', 'boston_gap', 'concrete_gap',
                'energy_gap', 'power_gap', 'wine_gap', 'yacht_gap',
                'kin8nm_gap', 'naval_gap', 'protein_gap'
        ]:
            worker_class = create_UCIWorker(args.method, args.network,
                                            args.width, args.batch_size)
            worker = worker_class(dname=args.dataset,
                                  base_dir=args.data_folder,
                                  prop_val=args.valprop,
                                  n_split=args.n_split,
                                  early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        else:
            raise ValueError('Dataset not implemented yet!')

        worker.run(background=True)
        workers.append(worker)

    n_iterations = args.n_iterations
    previous_run = None
    if args.previous_result_folder is not None:
        try:
            previous_run = hpres.logged_results_to_HBS_result(
                args.previous_result_folder)
        except Exception as e:
            print(e)

    # Run an optimizer
    bohb = BOHB(
        configspace=worker.get_configspace(),
        run_id=args.run_id,
        host=host,
        nameserver=ns_host,
        nameserver_port=ns_port,
        result_logger=result_logger,
        min_budget=args.min_budget,
        max_budget=args.max_budget,
        previous_result=previous_run,
    )

    res = bohb.run(n_iterations=n_iterations, min_n_workers=args.num_workers)

    # store results
    with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # shutdown
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    all_runs = res.get_all_runs()

    print('Best found configuration:', id2config[incumbent]['config'])
    print('A total of %i unique configurations where sampled.' %
          len(id2config.keys()))
    print('A total of %i runs where executed.' % len(res.get_all_runs()))
    print('Total budget corresponds to %.1f full function evaluations.' %
          (sum([r.budget for r in all_runs]) / args.max_budget))
    print('The run took  %.1f seconds to complete.' %
          (all_runs[-1].time_stamps['finished'] -
           all_runs[0].time_stamps['started']))
コード例 #28
0
# traer desde el entorno los valores del espacio de acciones
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

# crear la politica, replay buffer y como se van a evaluar
policy = TD3(state_dim, action_dim, max_action)
replay_buffer = ReplayBuffer()
evaluations = [evaluate_policy(env, policy)]

# crear carpetas de monitoreo
work_dir = mkdir('exp', 'brs')
monitor_dir = mkdir(work_dir, 'monitor')
# máximo numéro de episodios
max_episode_steps = env._max_episode_steps

# guardar o no el video
save_env_vid = False
if save_env_vid:
    env = wrappers.Monitor(env, monitor_dir, force=True)
    env.reset()

# inicializar las variables de entrenamiento
total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
done = True
コード例 #29
0
ファイル: train_DUN.py プロジェクト: omegafragger/DUN
def train_loop(net,
               dname,
               data_dir,
               epochs=90,
               workers=4,
               resume='',
               savedir='./',
               save_all_epochs=False,
               q_nograd_its=0,
               batch_size=256):
    mkdir(savedir)
    global best_err1

    # Load data here:
    _, train_loader, val_loader, _, _, Ntrain = \
        get_image_loader(dname, batch_size, cuda=True, workers=workers, distributed=False, data_dir=data_dir)

    net.N_train = Ntrain

    start_epoch = 0

    marginal_loglike = np.zeros(epochs)
    train_loss = np.zeros(epochs)
    dev_loss = np.zeros(epochs)

    err_train = np.zeros(epochs)
    err_dev = np.zeros(epochs)

    # optionally resume from a checkpoint
    if resume:
        if os.path.isfile(resume):
            print("=> loading checkpoint '{}'".format(resume))
            start_epoch, best_err1 = net.load(resume)
            print("=> loaded checkpoint '{}' (epoch {})".format(
                resume, start_epoch))
        else:
            print("=> no checkpoint found at '{}'".format(resume))

        candidate_progress_file = resume.split('/')
        candidate_progress_file = '/'.join(
            candidate_progress_file[:-1]) + '/stats_array.pkl'

        if os.path.isfile(candidate_progress_file):
            print("=> found progress file at '{}'".format(
                candidate_progress_file))
            try:
                marginal_loglike, err_train, train_loss, err_dev, dev_loss = \
                    load_object(candidate_progress_file)
                print("=> Loaded progress file at '{}'".format(
                    candidate_progress_file))
            except Exception:
                print("=> Unable to load progress file at '{}'".format(
                    candidate_progress_file))
        else:
            print("=> NOT found progress file at '{}'".format(
                candidate_progress_file))

    if q_nograd_its > 0:
        net.prob_model.q_logits.requires_grad = False

    for epoch in range(start_epoch, epochs):
        if q_nograd_its > 0 and epoch == q_nograd_its:
            net.prob_model.q_logits.requires_grad = True

        tic = time.time()
        nb_samples = 0
        for x, y in train_loader:
            marg_loglike_estimate, minus_loglike, err = net.fit(x, y)

            marginal_loglike[epoch] += marg_loglike_estimate * x.shape[0]
            err_train[epoch] += err * x.shape[0]
            train_loss[epoch] += minus_loglike * x.shape[0]
            nb_samples += len(x)

        marginal_loglike[epoch] /= nb_samples
        train_loss[epoch] /= nb_samples
        err_train[epoch] /= nb_samples

        toc = time.time()

        # ---- print
        print('\n depth approx posterior',
              net.prob_model.current_posterior.data.cpu().numpy())
        print(
            "it %d/%d, ELBO/evidence %.4f, pred minus loglike = %f, err = %f" %
            (epoch, epochs, marginal_loglike[epoch], train_loss[epoch],
             err_train[epoch]),
            end="")
        cprint('r', '   time: %f seconds\n' % (toc - tic))

        net.update_lr()

        # ---- dev
        tic = time.time()
        nb_samples = 0
        for x, y in val_loader:
            minus_loglike, err = net.eval(x, y)

            dev_loss[epoch] += minus_loglike * x.shape[0]
            err_dev[epoch] += err * x.shape[0]
            nb_samples += len(x)

        dev_loss[epoch] /= nb_samples
        err_dev[epoch] /= nb_samples

        toc = time.time()

        cprint('g',
               '     pred minus loglike = %f, err = %f\n' %
               (dev_loss[epoch], err_dev[epoch]),
               end="")
        cprint('g', '    time: %f seconds\n' % (toc - tic))

        filename = 'checkpoint.pth.tar'
        if save_all_epochs:
            filename = str(epoch) + '_' + filename
        net.save(os.path.join(savedir, filename), best_err1)
        if err_dev[epoch] < best_err1:
            best_err1 = err_dev[epoch]
            cprint('b', 'best top1 dev err: %f' % err_dev[epoch])
            shutil.copyfile(os.path.join(savedir, filename),
                            os.path.join(savedir, 'model_best.pth.tar'))

        all_results = [
            marginal_loglike, err_train, train_loss, err_dev, dev_loss
        ]
        save_object(all_results, os.path.join(savedir, 'stats_array.pkl'))
コード例 #30
0
 def save(self, path):
     mkdir(path)
     self.saver.save(self.sess, path + self.NAME)
     pickle_dump(self.dm, path + 'dm.pkl')
     logger.info("Model saved to '{}'".format(path))
コード例 #31
0
            ('lucene', 'v4.0'),
            ('mahout', 'v0.8'),
            ('openjpa', 'v2.0.1'),
            ('openjpa', 'v2.2.0'),
            ('pig', 'v0.8.0'),
            ('pig', 'v0.11.1'),
            ('solr', 'v4.4.0'),
            ('tika', 'v1.3'),
            ('zookeeper', 'v3.4.5'),
            ]

#projects = [('hibernate', 'v3.5.0b2')]
for project, version in projects:
    path = '/'.join(['data', project, version])
    print(path)
    mkdir(path + '/queries')

    with open(path + '/ids.txt') as f:
        bugs = [x.strip() for x in f]

    p = etree.XMLParser()
    hp = etree.HTMLParser()

    for bugid in bugs:
        print("Fetching bugid", bugid)
        fname = project.upper() + '-' + bugid
#        fname = 'HHH-' + bugid
        r = requests.get(url_base % (fname, fname))
        try:
            tree = etree.parse(StringIO(r.text), p)
        except etree.XMLSyntaxError: