Esempio n. 1
0
    def load_images(self, img_size):
        if os.path.exists(self.data_path + f"/image_data_{img_size}.plk"):
            with open(self.data_path + f"/image_data_{img_size}.plk",
                      "rb") as fp:
                data = pickle.load(fp)
        else:
            file_name = "CUB_200_2011.tgz"
            url = "http://www.vision.caltech.edu.s3-us-west-2.amazonaws.com/visipedia-data/CUB-200-2011/CUB_200_2011.tgz"
            if not os.path.exists(self.data_path + f"/{file_name}"):
                downloder(url, self.data_path + f"/{file_name}")

            if len(glob.glob(self.data_path + "/images/*.jpg")) != IMAGE_SIZE:
                print("Info:Extracting image data from tar file")
                import tarfile, shutil
                with tarfile.open(self.data_path + f"/{file_name}",
                                  'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                    shutil.move(self.data_path + "/CUB_200_2011/images",
                                self.data_path)

            data = {}
            files = glob.glob(self.data_path + "/images/*/*.jpg")
            print(f"Info:load {img_size}x{img_size} image data")
            for i, _path in enumerate(files):
                arr_id = int(_path.split("_")[-1].split(".")[0])
                arr = self.path2array(_path, img_size)
                data[arr_id] = arr
                progress(i + 1, IMAGE_SIZE)
            print("")

            with open(self.data_path + f"/image_data_{img_size}.plk",
                      "wb") as fp:
                pickle.dump(data, fp)

        return data
Esempio n. 2
0
def deep_train(data_iterator, model, epoch=0, steps=500):
    '''
    training for one epoch
    '''
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    print('Start training epoch %i.....' % epoch)
    losses = []
    for step in range(steps):
        start = time.time()
        optimizer.zero_grad()

        # get data
        X, y = next(data_iterator)
        X, y = tensor_convertor(X, y=y)
        assert X.shape==(data_iterator.batch_size,5, 400) or \
                X.shape==(data_iterator.batch_size + 1,5,400), \
                X.shape

        #get prediction (forward)
        pred_y = model(X)
        pred_y = pred_y.view(-1)
        assert sum(pred_y != pred_y).item() == 0, pred_y
        loss = F.binary_cross_entropy(pred_y, y)
        losses.append(loss)

        # update gradient
        loss.backward()
        optimizer.step()
        end = time.time()

        status = '%i/%i step\tloss:%.3f\tused: %.3fs' \
                %(step, steps, loss.item(), end-start)
        progress(steps, step, epoch + 1, status)
    calculate_metrics(y, pred_y, epoch + 1, loss.item())
    return losses
Esempio n. 3
0
def send_requests(db, known_valid, test_case_1, test_case_2, missing_chars):
    """
    :param db: The database where samples are stored
    :param known_valid: The known valid characters
    :param test_case_1: One character to test (test case 1)
    :param test_case_2: One character to test (test case 2)
    :param missing_chars: The total number of chars of the API key
    :return: None. All is stored in the DB
    """
    session = requests.Session()

    http_adapter = requests.adapters.HTTPAdapter(max_retries=3)

    session.mount('http://', http_adapter)
    session.mount('https://', http_adapter)

    token_test_case_1 = generate_test_token(known_valid, test_case_1,
                                            missing_chars)

    token_test_case_2 = generate_test_token(known_valid, test_case_2,
                                            missing_chars)

    print('Collecting %s samples for:' % NUM_SAMPLES * 2)
    print(' - %s' % token_test_case_1)
    print(' - %s' % token_test_case_2)
    print('')
    print('Test name: %s' % TEST_NAME)

    for i in xrange(NUM_SAMPLES):

        #
        # What I'm trying to do here is to get timings in pairs.
        # https://github.com/andresriancho/django-rest-framework-timing/issues/5
        #
        tmp_results = {}

        # Sending the HTTP requests in different order during sample capture is
        # something recommended by Paul McMillan and Sebastian Schinzel, they
        # recommend it because it might break some caches
        shuffled_token_tests = [(0, token_test_case_1), (1, token_test_case_2)]
        random.shuffle(shuffled_token_tests)

        for j, token in shuffled_token_tests:
            response, naive_time = send_with_naive_timing(session, URL, token)
            tmp_results[j] = (response, naive_time, token)

        data = {'test_name': TEST_NAME, 'capture_timestamp': time.time()}

        for j, (response, naive_time,
                token) in enumerate(tmp_results.values()):
            data.update({
                'x_runtime_%s' % j: response.headers['X-Runtime'],
                'userspace_rtt_microseconds_%s' % j: naive_time,
                'token_%s' % j: token
            })

        db.insert(data)

        if i % (NUM_SAMPLES / 1000) == 0:
            progress(i, NUM_SAMPLES)
Esempio n. 4
0
 def index_blog(self):
     ghost = get_voyage_connection()
     with ghost.cursor() as ghost_cur:
         ghost_cur.execute("SELECT id, title, published_at, html "
                           "FROM posts WHERE status='published' "
                           "ORDER BY published_at DESC")
         posts = [{
             "post_id": str(post[0]),
             "title": post[1],
             "text": clean_string(post[3])
         } for post in ghost_cur.fetchall()]
         total_cnt = len(posts)
         print("Index %d posts" % total_cnt)
         for i, post in enumerate(posts):
             ghost_cur.execute(
                 "SELECT t.name FROM tags t "
                 "LEFT JOIN posts_tags pt ON pt.tag_id=t.id "
                 "WHERE pt.post_id=%s", str(post['post_id']))
             tags = [tag[0] for tag in ghost_cur.fetchall()]
             post["tags"] = ",".join(tags)
             suffix = post["title"][:25]
             progress(i, total_cnt, suffix)
             self.index_document(self.index_name, post)
     ghost.close()
     print("\nFinished indexing posts")
Esempio n. 5
0
    def load_images(self, is_train=True):
        if is_train:
            data_type = "train"
            data_len = train_image_size
        else:
            data_type = "val"
            data_len = val_image_size

        path = self.data_path + "/image"
        if not os.path.exists(path): os.mkdir(path)

        if os.path.exists(path +
                          f"/{data_type}2014_array/{self.image_size[0]}"):
            files = glob.glob(
                path + f"/{data_type}2014_array/{self.image_size[0]}/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
        else:
            url = f"http://images.cocodataset.org/zips/{data_type}2014.zip"
            file_name = url.split("/")[-1]
            if not os.path.exists(path + f"/{file_name}"):
                downloder(url, path + f"/{file_name}")

            if len(glob.glob(path + f"/{data_type}2014/*.jpg")) != data_len:
                print(f"Info:Extracting {data_type} image data from zip file")
                import zipfile
                with zipfile.ZipFile(path + f"/{file_name}") as zip_fp:
                    zip_fp.extractall(path)
            files = glob.glob(path + f"/{data_type}2014/*.jpg")

            path = path + f"/{data_type}2014_array"
            if not os.path.exists(path): os.mkdir(path)
            path = path + f"/{self.image_size[0]}"
            if not os.path.exists(path):
                os.mkdir(path)
                print(f"Info:Conveting {data_type} image data path to ndarray")
                for i, _path in enumerate(files):
                    file_name = _path.split("/")[-1].split(".")[0]
                    arr = self.path2array(_path)
                    np.save(path + f"/{file_name}.npy", arr)
                    progress(i + 1, data_len)
                print("")

            files = glob.glob(path + "/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
Esempio n. 6
0
    def load_images(self, is_train=True):
        if is_train:
            data_type = "train"
            data_len = train_image_size
        else:
            data_type = "val"
            data_len = val_image_size

        if len(glob.glob(self.data_path +
                         f"/{data_type}2014_array/*.npy")) == data_len:
            files = glob.glob(self.data_path + f"/{data_type}2014_array/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
        else:
            url = f"http://images.cocodataset.org/zips/{data_type}2014.zip"
            file_name = url.split("/")[-1]
            if not os.path.exists(self.data_path + f"/{file_name}"):
                downloder(url, self.data_path + f"/{file_name}")

            path = self.data_path + f"/{data_type}2014_array"
            if len(glob.glob(self.data_path +
                             f"/{data_type}2014/*.jpg")) != data_len:
                print(
                    f"Info:Extract {data_type} images from zip file and convert images to ndarray"
                )
                if not os.path.exists(path): os.mkdir(path)
                import zipfile
                with zipfile.ZipFile(self.data_path +
                                     f"/{file_name}") as zip_fp:
                    file_count = len(zip_fp.filelist)
                    for i, item in enumerate(zip_fp.filelist):
                        file_name = item.filename.split("/")[-1].split(".")[0]
                        zip_fp.extract(item, self.data_path)
                        if file_name != "":
                            arr = self.path2array(
                                self.data_path +
                                f"/{data_type}2014/{file_name}.jpg")
                            np.save(path + f"/{file_name}", arr)
                        progress(i + 1, file_count)
                    print("")

            files = glob.glob(path + "/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
Esempio n. 7
0
 def index_templates(self):
     posts = []
     total_cnt = len(self.statics_tpl)
     print("Index %d templates" % total_cnt)
     for page_key, page in self.statics_tpl.items():
         with open("templates/" + page['tpl_file']) as p:
             text = clean_string(p.read())
             posts.append({
                 "post_id": "static-" + page_key,
                 "title": page['title'],
                 "text": text
             })
     for i, post in enumerate(posts):
         suffix = post["title"][:25]
         progress(i, total_cnt, suffix)
         self.index_document(self.index_name, post)
     print("\nFinished indexing templates")
Esempio n. 8
0
    def load_depth(self, image_data):
        if os.path.exists(self.data_path + "/depth_data.plk"):
            with open(self.data_path + "/depth_data.plk", "rb") as fp:
                depth_data = pickle.load(fp)
        else:
            print("Info:convert image to depth")
            depth_data = dict()
            key_list = list(image_data.keys())
            for i, key in enumerate(key_list):
                image = image_data[key]
                depth_128 = self.image2depth(image["x_256"])
                depth_64 = np.resize(depth_128, (64, 64))
                depth_data[key] = {"x_128": depth_128, "x_64": depth_64}
                progress(i + 1, len(key_list))

            with open(self.data_path + "/depth_data.plk", "wb") as fp:
                pickle.dump(depth_data, fp)

        return depth_data
Esempio n. 9
0
def get_next_char_with_timing(current_token_chars):
    session = requests.Session()
    historic_charset_timing = {}

    for i in xrange(NUM_SAMPLES):

        current_charset_timing = {}

        shuffled_charset = CHARSET[:]
        random.shuffle(shuffled_charset)

        for current_char in shuffled_charset:
            test_token = current_token_chars + current_char + CHARSET[0] * (VALID_TOKEN_LEN - 1)

            response = session.get(URL, headers={'Authorization': 'Token %s' % test_token})
            current_charset_timing[current_char] = float(response.headers['X-Runtime'])

        progress(i, NUM_SAMPLES)

        ranked_charset_timing = rank_charset_timing(current_charset_timing)

        for ichar in ranked_charset_timing:
            if ichar in historic_charset_timing:
                historic_charset_timing[ichar].append(ranked_charset_timing[ichar])
            else:
                historic_charset_timing[ichar] = [ranked_charset_timing[ichar]]

    average_char_ranking = {}

    for ichar in historic_charset_timing:
        average_char_ranking[ichar] = numpy.mean(historic_charset_timing[ichar])

    avg_items = average_char_ranking.items()
    avg_items.sort(value_sort)

    found_char = avg_items[0][0]
    print('Found character "%s"' % found_char)
    pprint.pprint(avg_items)
    pprint.pprint(average_char_ranking)

    return found_char
Esempio n. 10
0
    def load_images(self):
        files_cont = len(glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy"))
        if files_cont == image_size:
            files = glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}
        else:
            url = "http://www.vision.caltech.edu.s3-us-west-2.amazonaws.com/visipedia-data/CUB-200-2011/CUB_200_2011.tgz"
            title = "CUB_200_2011.tgz"
            if not os.path.exists(self.data_path + f"/{title}"):downloder(url, self.data_path + f"/{title}")
            
            if len(glob.glob(self.data_path + "/image/*/*.jpg")) != image_size:
                print("Info:Extracting image data from tar file")
                import tarfile, shutil
                with tarfile.open(self.data_path + f"/{title}", 'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                shutil.move(self.data_path + "/CUB_200_2011/images", self.data_path)
                os.rename(self.data_path + "/images", self.data_path + "/image")

            files = glob.glob(self.data_path + "/image/*/*.jpg")
            path = self.data_path + "/array"
            if not os.path.exists(path):os.mkdir(path)
            path = path + f"/{self.image_size[0]}"
            if not os.path.exists(path):
                os.mkdir(path)
                print("Info:Conveting image data path to ndarray")
                for i, _path in enumerate(files):
                    file_name = _path.split("/")[-1].split(".")[0]
                    arr = self.path2array(_path)
                    np.save(path + f"/{file_name}.npy", arr)
                    progress(i+1, image_size)
                print("")

            files = glob.glob(path + "/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}

        return data
Esempio n. 11
0
    def load_images(self):
        if os.path.exists(self.data_path + "/image_data.plk"):
            with open(self.data_path + "/image_data.plk", "rb") as fp:
                data = pickle.load(fp)
        else:
            file_name = "102flowers.tgz"
            url = "http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz"
            if not os.path.exists(self.data_path + f"/{file_name}"):
                downloder(url, self.data_path + f"/{file_name}")

            if len(glob.glob(self.data_path + "/images/*.jpg")) != IMAGE_SIZE:
                print("Info:Extracting image data from tar file")
                import tarfile
                with tarfile.open(self.data_path + f"/{file_name}",
                                  'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                os.rename(self.data_path + "/jpg", self.data_path + "/images")

            data = {}
            files = glob.glob(self.data_path + "/images/*.jpg")
            print("Info:load image data")
            for i, _path in enumerate(files):
                arr_id = int(_path.split("_")[-1].split(".")[0])
                arr_256 = self.path2array(_path, 256)
                arr_128 = self.path2array(_path, 128)
                arr_64 = self.path2array(_path, 64)
                data[arr_id] = {
                    "x_256": arr_256,
                    "x_128": arr_128,
                    "x_64": arr_64
                }
                progress(i + 1, IMAGE_SIZE)
            print("")

            with open(self.data_path + "/image_data.plk", "wb") as fp:
                pickle.dump(data, fp)

        return data
Esempio n. 12
0
    def load_images(self):
        files_cont = len(glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy"))
        if files_cont == image_size:
            files = glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}
        else:
            url = "http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz"
            title = "102flowers.tgz"
            if not os.path.exists(self.data_path + f"/{title}"):downloder(url, self.data_path + f"/{title}")
            
            if len(glob.glob(self.data_path + "/image/*.jpg")) != image_size:
                print("Info:Extracting image data from tar file")
                import tarfile
                with tarfile.open(self.data_path + f"/{title}", 'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                os.rename(self.data_path + "/jpg", self.data_path + "/image")

            files = glob.glob(self.data_path + "/image/*.jpg")
            path = self.data_path + "/array"
            if not os.path.exists(path):os.mkdir(path)
            path = path + f"/{self.image_size[0]}"
            if not os.path.exists(path):
                os.mkdir(path)
                print("Info:Conveting image data path to ndarray")
                for i, _path in enumerate(files):
                    file_name = _path.split("/")[-1].split(".")[0]
                    arr = self.path2array(_path)
                    np.save(path + f"/{file_name}.npy", arr)
                    progress(i+1, image_size)
                print("")

            files = glob.glob(path + "/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}

        return data
Esempio n. 13
0
        features['echoisms'], features['selfish'],#get_echoisms(lyric), get_selfish_degree(lyric),
        count_duplicate_lines(lyric), features['is_title_in_lyrics'],# (row['Song'], lyric),
        features['rhymes'],#get_rhymes(lyric),
        features['verb_tenses']['present'], features['verb_tenses']['past'], features['verb_tenses']['future'], #verb_freq['present'], verb_freq['past'], verb_freq['future'],
        freq['ADJ'], freq['ADP'], freq['ADV'], freq['AUX'], freq['CONJ'], 
        freq['CCONJ'], freq['DET'], freq['INTJ'], freq['NOUN'], freq['NUM'],
        freq['PART'], freq['PRON'], freq['PROPN'], freq['PUNCT'], freq['SCONJ'],
        freq['SYM'], freq['VERB'], freq['X'], freq['SPACE'],
        # Sentiment stuff
        sentiment[0], sentiment[1],
        song[2]
    )

    rows.append(elem)
    count += 1
    progress(count, total, '{}/{}'.format(count, total))

  df = pd.DataFrame(rows)
  df.columns = ['ARTIST', 'SONG_TITLE', 'LYRICS_VECTOR', 'TITLE_VECTOR', 
    'LINE_COUNT', 'WORD_COUNT', 'ECHOISMS', 'SELFISH_DEGREE', 
    'DUPLICATE_LINES', 'IS_TITLE_IN_LYRICS', 'RHYMES', 'VERB_PRESENT', 
    'VERB_PAST', 'VERB_FUTURE', 'ADJ_FREQUENCIES', 'CONJUCTION_FREQUENCIES', 
    'ADV_FREQUENCIES', 'AUX_FREQUENCIES', 'CONJ_FREQUENCIES', 'CCONJ_FREQUENCIES', 
    'DETERMINER_FREQUENCIES', 'INTERJECTION_FREQUENCIES', 'NOUN_FREQUENCIES', 
    'NUM_FREQUENCIES', 'PART_FREQUENCIES', 'PRON_FREQUENCIES', 'PROPN_FREQUENCIES', 
    'PUNCT_FREQUENCIES', 'SCONJ_FREQUENCIES', 'SYM_FREQUENCIES', 'VERB_FREQUENCIES',
    'X_FREQUENCIES', 'SPACE_FREQUENCIES', 
    'SENTIMENT', 'SUBJECTIVITY',
    'EMOTION'
  ]
Esempio n. 14
0
try:
    from instrumental.drivers.sourcemeasureunit.hp import HP_4156C
    SOURCEMETER = HP_4156C(visa_address='GPIB0::17::INSTR')
except:
    print('no sourcemeter available. exiting.')
    exit()
else:
    print('HP opened')
    SOURCEMETER.set_channel(channel=2)

SOURCEMETER.set_current_compliance(Q_(100e-6, 'A'))
bring_to_breakdown(SOURCEMETER, Vbd)

for t in range(round(bias_settle_time)):
    progress(t + 1,
             round(bias_settle_time),
             status='Bias settle wait {}/{:g} sec'.format(
                 t + 1, bias_settle_time))
    time.sleep(1.0)

# Start measurements
timestamp = []
measurements = []
bias_current = []
maxlevel = []
minlevel = []
temperatures = []

print('\nPerforming measurement...')

for i in range(num_measures):
    progress(i + 1, num_measures, status='Running measurement')
Esempio n. 15
0
import song_featurize as sf
import utils.progress as progress

import pandas as pd

ML4Q_PATH = './datasets/silver_standard/songsDF.csv'

df = pd.read_csv(ML4Q_PATH)

X = list()
total = len(df)
i = 0
for idx, row in df.iterrows():
    progress.progress(i, total, 'Parsed songs')
    point = sf.featurize(row['PID'], row['Artist'], row['Title'])
    if point is not None:
        point.append(row['SeqID'])
        X.append(point)
    i += 1

df = pd.DataFrame(
    X,
    columns=[
        'PLAYLIST_PID', 'ARTIST', 'SONG_TITLE', 'LYRICS_VECTOR',
        'TITLE_VECTOR', 'LINE_COUNT', 'WORD_COUNT', 'ECHOISMS',
        'SELFISH_DEGREE', 'DUPLICATE_LINES', 'IS_TITLE_IN_LYRICS', 'RHYMES',
        'VERB_PRESENT', 'VERB_PAST', 'VERB_FUTURE', 'ADJ_FREQUENCIES',
        'CONJUCTION_FREQUENCIES', 'ADV_FREQUENCIES', 'AUX_FREQUENCIES',
        'CONJ_FREQUENCIES', 'CCONJ_FREQUENCIES', 'DETERMINER_FREQUENCIES',
        'INTERJECTION_FREQUENCIES', 'NOUN_FREQUENCIES', 'NUM_FREQUENCIES',
        'PART_FREQUENCIES', 'PRON_FREQUENCIES', 'PROPN_FREQUENCIES',
Esempio n. 16
0
def main():
    conf = get_args()
    dataset = MSCOCO(conf)
    VOC_SIZE = dataset.jp_voc_size if conf.use_lang == "jp" else dataset.en_voc_size
    SAMPLE_SIZE = conf.sample_size // conf.gpu_num if conf.gpu_num > 1 else conf.sample_size
    SEQ_LEN = conf.seq_len_jp if conf.use_lang == "jp" else conf.seq_len_en
    index2tok = dataset.jp_index2tok if conf.use_lang == "jp" else dataset.en_index2tok

    if not conf.silent:
        save_path = os.path.abspath(script_path + conf.save_path)
        if not os.path.exists(save_path): os.mkdir(save_path)
        save_path = os.path.abspath(save_path + f"/{conf.use_lang}")
        if not os.path.exists(save_path): os.mkdir(save_path)
        preview_path = os.path.abspath(save_path + "/preview")
        if not os.path.exists(preview_path): os.mkdir(preview_path)

    netG, netD = build_models(conf, VOC_SIZE, SEQ_LEN)
    optimizerG, optimizerD = build_optimizer(netG, netD, conf.adam_lr,
                                             conf.adam_beta1, conf.adam_beta2)
    pprog = print_progress(conf.pre_gen_max_epoch, conf.batch_size,
                           dataset.train_data_len)

    updater = Updater(netG, netD, optimizerG, optimizerD, conf)

    def pretrain_generatr():
        print("==========================================")
        print("Info:start genarator pre train")
        pre_gen_loss_hist = np.zeros((1, conf.pre_gen_max_epoch),
                                     dtype="float32")
        for i in range(conf.pre_gen_max_epoch):
            count = 0
            total_loss = 0
            start = time.time()
            for data in dataset.get_data():
                data = toGPU(data, conf.gpu_num)
                loss = updater.update_pre_gen(data)

                total_loss += loss.data.cpu().numpy()

                count += 1
                if dataset.now_iter % conf.display_interval == 0:
                    elapsed = time.time() - start
                    pprog(elapsed, dataset.get_state)
                    start = time.time()

            pre_gen_loss_hist[0, i] = total_loss / count

        if not conf.silent:
            data = dataset.sample(conf.sample_size)
            sample_generate(netG, data, SAMPLE_SIZE, index2tok, conf.gpu_num,\
                        conf.noise_dim, preview_path + f"/sample_text_pretrain.txt")
            np.save(save_path + "/pre_gen_loss_hist", pre_gen_loss_hist)
            torch.save(netG.state_dict(), save_path + "/pretrain_gen_params")
        print("\n\n\n\n==========================================")

    def pretrain_discriminator():
        print("==========================================")
        print("Info:start discriminator pre train")
        dataset.clear_state()
        pprog.max_iter = conf.pre_dis_max_epoch
        pre_dis_hist = np.zeros((4, conf.pre_dis_max_epoch), dtype="float32")
        for i in range(conf.pre_dis_max_epoch):
            count = 0
            total_loss = 0
            total_real_acc = 0
            total_fake_acc = 0
            total_wrong_acc = 0
            start = time.time()
            for data in dataset.get_data():
                data = toGPU(data, conf.gpu_num)
                loss, real_acc, fake_acc, wrong_acc = updater.update_dis(data)

                total_loss += loss.data.cpu().numpy()
                total_real_acc += real_acc.data.cpu().numpy()
                total_fake_acc += fake_acc.data.cpu().numpy()
                total_wrong_acc += wrong_acc.data.cpu().numpy()

                count += 1
                if dataset.now_iter % conf.display_interval == 0:
                    elapsed = time.time() - start
                    pprog(elapsed, dataset.get_state)
                    start = time.time()

            pre_dis_hist[0, i] = total_loss / count
            pre_dis_hist[1, i] = total_real_acc / count
            pre_dis_hist[2, i] = total_fake_acc / count
            pre_dis_hist[3, i] = total_wrong_acc / count

        if not conf.silent:
            np.save(save_path + "/pre_dis_hist", pre_dis_hist)
            torch.save(netD.state_dict(), save_path + "/pretrain_dis_params")
        print("\n\n\n\n==========================================")

    if os.path.exists(save_path + "/pretrain_gen_params"):
        netG.load_state_dict(torch.load(save_path + "/pretrain_gen_params"))
    else:
        pretrain_generatr()

    if os.path.exists(save_path + "/pretrain_dis_params"):
        netD.load_state_dict(torch.load(save_path + "/pretrain_dis_params"))
    else:
        pretrain_discriminator()

    print("==========================================")
    print("Info:start main train")
    dataset.clear_state()
    pprog.max_iter = conf.max_epoch
    train_loss_hist = np.zeros((5, conf.max_epoch), dtype="float32")
    val_loss_hist = np.zeros((5, conf.max_epoch), dtype="float32")
    val_count = dataset.val_data_len // conf.batch_size
    if dataset.val_data_len % conf.batch_size != 1: val_count += 1
    for i in range(conf.max_epoch):
        #train loop
        count = 1
        total_g_loss = 0
        total_d_loss = 0
        total_real_acc = 0
        total_fake_acc = 0
        total_wrong_acc = 0
        start = time.time()

        for p in netG.parameters():
            p.requires_grad = True
        for p in netD.parameters():
            p.requires_grad = True
        for data in dataset.get_data():
            data = toGPU(data, conf.gpu_num)

            if count % conf.n_dis == 0:
                loss = updater.update_PG(data)
                total_g_loss += loss.data.cpu().numpy()

            loss, real_acc, fake_acc, wrong_acc = updater.update_dis(data)

            total_d_loss += loss.data.cpu().numpy()
            total_real_acc += real_acc.data.cpu().numpy()
            total_fake_acc += fake_acc.data.cpu().numpy()
            total_wrong_acc += wrong_acc.data.cpu().numpy()

            count += 1
            if dataset.now_iter % conf.display_interval == 0:
                elapsed = time.time() - start
                pprog(elapsed, dataset.get_state)
                start = time.time()

        train_loss_hist[0, i] = total_d_loss / count
        train_loss_hist[1, i] = total_real_acc / count
        train_loss_hist[2, i] = total_fake_acc / count
        train_loss_hist[3, i] = total_wrong_acc / count
        train_loss_hist[4, i] = total_g_loss / (count // 5)
        print("\n\n\n")

        #val loop
        print(f"Validation {i+1} / {conf.max_epoch}")
        count = 0
        total_g_loss = 0
        total_d_loss = 0
        total_real_acc = 0
        total_fake_acc = 0
        total_wrong_acc = 0
        start = time.time()
        for p in netG.parameters():
            p.requires_grad = False
        for p in netD.parameters():
            p.requires_grad = False
        for data in dataset.get_data(is_val=True):
            data = toGPU(data, conf.gpu_num)

            g_loss, d_loss, real_acc, fake_acc, wrong_acc = updater.evaluate(
                data)

            count += 1
            if dataset.now_iter % conf.display_interval == 0:
                elapsed = time.time() - start
                progress(count + 1, val_count, elapsed)

        progress(count, val_count, elapsed)
        val_loss_hist[0, i] = total_d_loss / count
        val_loss_hist[1, i] = total_real_acc / count
        val_loss_hist[2, i] = total_fake_acc / count
        val_loss_hist[3, i] = total_wrong_acc / count
        val_loss_hist[4, i] = total_g_loss / (count // 5)
        print("\u001B[5A", end="")

        if (i + 1) % conf.snapshot_interval == 0 and not conf.silent:
            data = dataset.sample(conf.sample_size)
            sample_generate(netG, data, SAMPLE_SIZE, index2tok, conf.gpu_num,\
                        conf.noise_dim, preview_path + f"/sample_text_{i+1:04d}.txt")
            np.save(save_path + "/train_loss_hist", train_loss_hist)
            np.save(save_path + "/val_loss_hist", val_loss_hist)
            torch.save(netG.state_dict(),
                       save_path + f"/gen_params_{i+1:04d}.pth")
            torch.save(netD.state_dict(),
                       save_path + f"/dis_params_{i+1:04d}.pth")

    if not conf.silent:
        np.save(save_path + "/train_loss_hist", train_loss_hist)
        np.save(save_path + "/val_loss_hist", val_loss_hist)
        data = dataset.sample(conf.sample_size)
        sample_generate(netG, data, SAMPLE_SIZE, index2tok, conf.gpu_num,\
                    conf.noise_dim, preview_path + "/sample_text.txt")
        torch.save(netG.state_dict(), save_path + "/gen_params.pth")
        torch.save(netD.state_dict(), save_path + "/dis_params.pth")
    print("\n\n\n\n==========================================")
    print("Info:finish train")
Esempio n. 17
0
def send_requests(db, known_valid, test_case_1, test_case_2, missing_chars):
    """
    :param db: The database where samples are stored
    :param known_valid: The known valid characters
    :param test_case_1: One character to test (test case 1)
    :param test_case_2: One character to test (test case 2)
    :param missing_chars: The total number of chars of the API key
    :return: None. All is stored in the DB
    """
    session = requests.Session()

    http_adapter = requests.adapters.HTTPAdapter(max_retries=3)

    session.mount('http://', http_adapter)
    session.mount('https://', http_adapter)

    token_test_case_1 = generate_test_token(known_valid,
                                            test_case_1,
                                            missing_chars)

    token_test_case_2 = generate_test_token(known_valid,
                                            test_case_2,
                                            missing_chars)

    print('Collecting %s samples for:' % NUM_SAMPLES * 2)
    print(' - %s' % token_test_case_1)
    print(' - %s' % token_test_case_2)
    print('')
    print('Test name: %s' % TEST_NAME)

    for i in xrange(NUM_SAMPLES):

        #
        # What I'm trying to do here is to get timings in pairs.
        # https://github.com/andresriancho/django-rest-framework-timing/issues/5
        #
        tmp_results = {}

        # Sending the HTTP requests in different order during sample capture is
        # something recommended by Paul McMillan and Sebastian Schinzel, they
        # recommend it because it might break some caches
        shuffled_token_tests = [(0, token_test_case_1),
                                (1, token_test_case_2)]
        random.shuffle(shuffled_token_tests)

        for j, token in shuffled_token_tests:
            response, naive_time = send_with_naive_timing(session, URL, token)
            tmp_results[j] = (response, naive_time, token)

        data = {'test_name': TEST_NAME,
                'capture_timestamp': time.time()}

        for j, (response, naive_time, token) in enumerate(tmp_results.values()):
            data.update({'x_runtime_%s' % j: response.headers['X-Runtime'],
                         'userspace_rtt_microseconds_%s' % j: naive_time,
                         'token_%s' % j: token})

        db.insert(data)

        if i % (NUM_SAMPLES / 1000) == 0:
            progress(i, NUM_SAMPLES)
Esempio n. 18
0
    log.write('\n')

def download_lyric(song):
  try:
    lyric = lyricwikia.get_lyrics(song['Artist'], song['Title'])
    filename = '_'.join([song['Mood'], song['Artist'], song['Title']])
    filename = filename.replace('/', '-') # The '/' should never appear
    with open(os.path.join(args.output, filename), 'w') as sfile:
      sfile.write(lyric)
      return True
  except lyricwikia.LyricsNotFound:
    err('Could not download {}: {}, {}'.format(song['Index'], song['Artist'], song['Title']))
    return False

if __name__ == '__main__':
  # Get the number of songs we are going to download
  totalTitles = songs_count(args.input)

  # Create output directory
  create_output_dir(args.output)

  # Download songs
  count = 0
  errCount = 0
  for lyric in lyric_entries_generator(args.input):
    progress(count, totalTitles, 'Errors encountered: {}'.format(errCount))
    if not download_lyric(lyric):
      errCount += 1
    count += 1