Example #1
0
def main():
    args = get_args()
    config = utils.get_config(args.config)
    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

    session = http_session.StorageSession(**config['session'],
                                          access_key=utils.get_access_token())

    root_dir = config['data']['root_dir']

    raw_path = utils.build_path(root_dir=root_dir,
                                sub_dir=args.raw,
                                date=args.date,
                                ext='json')
    data = download_data(session, path=raw_path)

    rows = parse_data(data)

    LOGGER.info("Retrieved %s rows", len(rows))

    headers = utils.get_headers(config['fields'])
    rows = transform.clean(rows, data_types=headers, date=args.date)

    output_path = utils.build_path(root_dir=root_dir,
                                   sub_dir=args.output,
                                   date=args.date,
                                   ext='csv')
    utils.write_csv(path=output_path, rows=rows, header=args.header)
 def __init__(self,
              name=None,
              key_resource_id=None,
              extra_resources=None,
              path=None,
              lang="en"):
     self.key_resource_id = urljoin(BASE_URL, key_resource_id.strip())
     self.filename = hashlib.sha1(name.encode("utf-8")).hexdigest()
     self.title = name if len(name) < 80 else name[:80]
     self.path_levels = path
     self.lang = lang
     self.file = None
     self.video = None
     self.ids = set([])
     LOGGER.info("Collecting: {}".format(self.key_resource_id))
     LOGGER.info("   - Name: {}".format(self.title))
     LOGGER.info("   - Lang: {}".format(self.lang))
     self.html = HTMLLesson(source_id=self.key_resource_id,
                            name=self.title,
                            lang=self.lang)
     if self.path_levels[-1] is None:
         self.base_path = build_path([DATA_DIR] + self.path_levels[:-1] +
                                     [self.filename])
     else:
         self.base_path = build_path([DATA_DIR] + self.path_levels +
                                     [self.filename])
     if extra_resources is not None:
         LOGGER.info("   - Extra resources: {}".format(
             len(extra_resources)))
         self.set_extra_resources(extra_resources)
 def __init__(self):
     build_path([TESSIndiaChef.TREES_DATA_DIR])
     self.scrape_stage = os.path.join(
         TESSIndiaChef.TREES_DATA_DIR,
         TESSIndiaChef.SCRAPING_STAGE_OUTPUT_TPL)
     self.crawling_stage = os.path.join(
         TESSIndiaChef.TREES_DATA_DIR,
         TESSIndiaChef.CRAWLING_STAGE_OUTPUT_TPL)
     super(TESSIndiaChef, self).__init__()
Example #4
0
 def pre_run(self, args, options):
     build_path([FolkDCChef.TREES_DATA_DIR])
     self.download_css_js()
     self.lang = options.get('--lang', "en")
     self.RICECOOKER_JSON_TREE = FolkDCChef.SCRAPING_STAGE_OUTPUT_TPL.format(
         lang=self.lang)
     self.scrape_stage = os.path.join(FolkDCChef.TREES_DATA_DIR,
                                      self.RICECOOKER_JSON_TREE)
     channel_tree = self.scrape(args, options)
     self.write_tree_to_json(channel_tree)
Example #5
0
    def scrape(self, args, options):
        download_video = options.get('--download-video', "1")
        load_video_list = options.get('--load-video-list', "0")

        if int(download_video) == 0:
            global DOWNLOAD_VIDEOS
            DOWNLOAD_VIDEOS = False

        if int(load_video_list) == 1:
            global LOAD_VIDEO_LIST
            LOAD_VIDEO_LIST = True

        global CHANNEL_SOURCE_ID
        self.RICECOOKER_JSON_TREE = 'ricecooker_json_tree.json'
        channel_tree = dict(
            source_domain=CHANNEL_DOMAIN,
            source_id=CHANNEL_SOURCE_ID,
            title=CHANNEL_NAME,
            description=
            CHANNEL_DESCRIPTION[:400],  #400 UPPER LIMIT characters allowed 
            thumbnail=CHANNEL_THUMBNAIL,
            author=AUTHOR,
            language=CHANNEL_LANGUAGE,
            children=[],
            license=LICENSE,
        )

        grades = GradeJsonTree(subject_node=SubjectNode)
        grades.load("resources.json",
                    auto_parse=True,
                    author=AUTHOR,
                    license=LICENSE,
                    save_url_to=build_path([DATA_DIR, CHANNEL_SOURCE_ID]),
                    load_video_list=load_video_list)

        base_path = [DATA_DIR]
        base_path = build_path(base_path)

        for grade in grades:
            for subject in grade.subjects:
                for lesson in subject.lessons:
                    video = lesson.download(download=DOWNLOAD_VIDEOS,
                                            base_path=base_path)
                    lesson.add_node(video)
                    subject.add_node(lesson)
                grade.add_node(subject)
            channel_tree["children"].append(grade.to_dict())
        return channel_tree
    def scrape(self, args, options):
        download_video = options.get('--download-video', "1")
        load_video_list = options.get('--load-video-list', "0")

        if int(download_video) == 0:
            global DOWNLOAD_VIDEOS
            DOWNLOAD_VIDEOS = False

        if int(load_video_list) == 1:
            global LOAD_VIDEO_LIST
            LOAD_VIDEO_LIST = True

        channel_tree = self.lessons()

        base_path = [DATA_DIR] + ["data"]
        base_path = build_path(base_path)

        vocabulary = VocabularyConversationalEnglish()
        for unit in vocabulary.auto_generate_units(BASE_URL, base_path):
            vocabulary.add_node(unit)
        english_grammar = EnglishGrammar()
        for unit in english_grammar.auto_generate_units(base_path):
            english_grammar.add_node(unit)
        channel_tree["children"].append(english_grammar.to_node())
        channel_tree["children"].append(vocabulary.to_node())
        
        return channel_tree
Example #7
0
    def download(self, download=True, base_path=None):
        if not "watch?" in self.source_id or "/user/" in self.source_id or\
            download is False:
            return

        download_to = build_path([base_path, 'videos'])
        for i in range(4):
            try:
                info = self.get_video_info(download_to=download_to, subtitles=False)
                if info is not None:
                    LOGGER.info("    + Video resolution: {}x{}".format(info.get("width", ""), info.get("height", "")))
                    self.filepath = os.path.join(download_to, "{}.mp4".format(info["id"]))
                    self.filename = info["title"]
                    if self.filepath is not None and os.stat(self.filepath).st_size == 0:
                        LOGGER.info("    + Empty file")
                        self.filepath = None
            except (ValueError, IOError, OSError, URLError, ConnectionResetError) as e:
                LOGGER.info(e)
                LOGGER.info("Download retry")
                time.sleep(.8)
            except (youtube_dl.utils.DownloadError, youtube_dl.utils.ContentTooShortError,
                    youtube_dl.utils.ExtractorError, OSError) as e:
                LOGGER.info("     + An error ocurred, may be the video is not available.")
                return
            except OSError:
                return
            else:
                return
Example #8
0
def process(cmd, file_name, content):
    """ Handler to process incoming commands
    cmd: command name
    file_name: file name to be processed by command
    content: content of the file in case of create command
    """
    logger.info('command: {}, file_name: {}, content: {}'
                .format(cmd, file_name, content))
    try:
        utils.check_working_dir()
        cmd_to_execute = COMMANDS.get(cmd, default_cmd)
        file_path = utils.build_path(file_name)
        if content:
            result = cmd_to_execute(file_path, content)
        else:
            result = cmd_to_execute(file_path)

        if result:
            logger.info('result = {}'.format(result))
    except CreateFileException as e:
        logger.error('CreateFileException: {}'.format(e.message))
    except ReadFileException as e:
        logger.error('ReadFileException: {}'.format(e.message))
    except RemoveFileException as e:
        logger.error('RemoveFileException: {}'.format(e.message))
    except GetMetaDataException as e:
        logger.error('GetMetaDataException: {}'.format(e.message))
Example #9
0
    def scrape(self, args, options):
        download_video = options.get('--download-video', "1")
        basic_lessons = int(options.get('--basic-lessons', "0"))
        intermedian_lessons = int(options.get('--intermedian-lessons', "0"))
        load_video_list = options.get('--load-video-list', "0")

        if int(download_video) == 0:
            global DOWNLOAD_VIDEOS
            DOWNLOAD_VIDEOS = False

        if int(load_video_list) == 1:
            global LOAD_VIDEO_LIST
            LOAD_VIDEO_LIST = True

        global channel_tree
        if basic_lessons == 1:
            channel_tree, subjects = self.k12_lessons()
        elif intermedian_lessons == 1:
            channel_tree, subjects = self.intermediate_lessons()
        else:
            channel_tree, subjects = self.k12_lessons()

        base_path = [DATA_DIR] + ["King Khaled University in Abha"]
        base_path = build_path(base_path)

        for subject in subjects:
            for topic in subject.topics:
                for unit in topic.units:
                    unit.download(download=DOWNLOAD_VIDEOS,
                                  base_path=base_path)
                    topic.add_node(unit)
                subject.add_node(topic)
            channel_tree["children"].append(subject.to_node())

        return channel_tree
 def write_videos(self, from_i=0, to_i=None):
     path = [DATA_DIR] + ["abdullah_videos"]
     path = build_path(path)
     for section in self.get_sections(from_i=from_i, to_i=to_i):
         LOGGER.info("* Section: {}".format(section.title))
         section.download(download=DOWNLOAD_VIDEOS, base_path=path)
         yield section.to_node()
Example #11
0
def newProject(args=None):
    '''
    >>> p = newProject()
    >>> p['message']
    'Project has been created'
    '''
    filename = _check_project_index()
    with open(filename, 'r') as fp:
        pindexes = json.load(fp)

    counter = pindexes['counter'] + 1
    name = 'project-%d' % counter
    path = os.path.join(project_base_path, name)
    if os.path.exists(path):
        logging.warning('Project path %s has been exists', path)
    else:
        logging.info('Make project path %s', path)
        os.mkdir(path)

    args = ['init', '--src', path, path]
    _pyarmor(args)

    pindexes['projects'][name] = os.path.abspath(path)
    pindexes['counter'] = counter
    with open(filename, 'w') as fp:
        json.dump(pindexes, fp)

    project = Project()
    project.open(path)

    project['name'] = name
    project['title'] = name
    project['output'] = build_path('dist', path)

    return dict(project=project, message='Project has been created')
Example #12
0
    def scrape(self, args, options):
        run_test = bool(int(options.get('--test', "0")))

        global channel_tree
        channel_tree = dict(
            source_domain=FolkDCChef.BASE_URL,
            source_id=CHANNEL_SOURCE_ID + "-" + self.lang,
            title="{} ({})".format(CHANNEL_NAME, self.lang),
            description=
            """Digital Children's Folksongs for Language and Cultural Learning: a collection of multi-language folk songs and activities for primary students to learn languages, engage in collaboration and critical thinking, and develop intercultural skills. Contains folk songs, activity suggestions, and teacher training materials."""[:
                                                                                                                                                                                                                                                                                                                                                 400],  #400 UPPER LIMIT characters allowed 
            thumbnail=CHANNEL_THUMBNAIL,
            author=AUTHOR,
            language=self.lang,
            children=[],
            license=LICENSE,
        )

        if run_test is True:
            return test(channel_tree)
        else:
            resources = Resource(lang=self.lang)
            resources.load("resources.json")
            for resource in resources:
                base_path = build_path(
                    [DATA_DIR, resource.lang,
                     resource.cls_name()])
                resource.to_file(base_path)
                node = resource.to_dict()
                if node is not None:
                    channel_tree["children"].append(node)
            return channel_tree
Example #13
0
 def build_pdfs_nodes(self, urls, base_path):
     base_path = build_path([base_path, 'pdfs'])
     pdf_nodes = []
     for pdf_url in urls:
         pdf_file = File(source_id=pdf_url, lang=self.lang, title=self.title)
         pdf_file.download(download=DOWNLOAD_FILES, base_path=base_path)
         pdf_nodes.append(pdf_file)
     return pdf_nodes
Example #14
0
def _generate_resource_config(api_info, tag_info, custom_configs):
    msg_prefix = {}
    for i in ["create", "update", "get"]:
        s = api_info.get(i, {}).get("msg_prefix", None)
        if s:
            msg_prefix[i] = s

    create_api = api_info["create"]["api"]
    rn = tag_info["name"]
    if custom_configs:
        rn = custom_configs.get("resource_name", rn)
    if isinstance(rn, unicode):
        raise Exception("Must config resouce_name in English, "
                        "because the tag is Chinese")

    data = {
        "name": rn[0].upper() + rn[1:].lower(),
        "service_type": create_api["service_type"],
        "base_url": build_path(create_api["path"]),
        "msg_prefix": msg_prefix,
        "description": tag_info.get("description", ""),
        "create_verb": api_info["create"]["create_verb"],
    }

    if "update" in api_info:
        data["update_verb"] = build_path(api_info["update"]["update_verb"])

    if "list" in api_info:
        info = api_info["list"]
        if "identity" not in info:
            raise Exception("Must config identity for list operation")

        api = info["api"]
        v = {
            "path": build_path(api["path"]),
            "identity": [{
                "name": i
            } for i in info["identity"]]
        }
        v["query_params"] = [{"name": i["name"]} for i in api["query_params"]]
        if "msg_prefix" in info:
            v["msg_prefix"] = info["msg_prefix"]
        data["list_info"] = v

    return pystache.Renderer().render_path("template/resource.mustache", data)
 def articles(self):
     for article_tag in self.soup.find_all("div", class_="views-row"):
         title = article_tag.find("div", class_="views-field-title")
         url = urljoin(BASE_URL, title.find("a").get("href"))
         article = Article(title.text, url)
         article.thumbnail = article_tag.find("img").get("src")
         base_path = build_path([DATA_DIR, self.topic.title, article.title])
         if article.to_file(base_path) is True:
             yield article.to_node()
Example #16
0
def store_raw_data(sampling_feature, date, directory, data):
    suffix = sampling_feature.rpartition('/')[2]
    path = utils.build_path(date=date, ext='xml', directory=directory, suffix=suffix)

    # Serialise
    with open(path, 'w') as file:
        file.write(data)

        LOGGER.info("Wrote '%s'", file.name)
Example #17
0
 def build_pdfs_nodes(self, base_path, content):
     pdfs_url = self.get_pdfs_urls(content)
     base_path = build_path([base_path, 'pdfs'])
     for pdf_url in pdfs_url:
         pdf_file = File(source_id=pdf_url,
                         lang=self.lang,
                         title=self.title)
         pdf_file.download(download=DOWNLOAD_FILES, base_path=base_path)
         yield pdf_file
Example #18
0
 def build_audio_nodes(self, base_path, content):
     audio_urls = self.get_audio_urls(content)
     base_path = build_path([base_path, 'audio'])
     for audio_url in audio_urls:
         audio_file = Audio(source_id=audio_url,
                            lang=self.lang,
                            title=self.title)
         audio_file.download(download=DOWNLOAD_AUDIO, base_path=base_path)
         yield audio_file
Example #19
0
 def build_video_nodes(self, base_path, content):
     videos_url = self.get_videos_urls(content)
     base_path = build_path([DATA_DIR])
     video_nodes = []
     for video_url in videos_url:
         if YouTubeResource.is_youtube(
                 video_url) and not YouTubeResource.is_channel(video_url):
             video = YouTubeResourceNode(video_url, lang=self.lang)
             video.download(download=DOWNLOAD_VIDEOS, base_path=base_path)
             yield video
 def build_pdfs_nodes(self, base_path, content):
     pdfs_urls = self.get_pdfs_urls(content)
     base_path = build_path([base_path, 'pdfs'])
     pdf_nodes = []
     for pdf_url in pdfs_urls:
         pdf_file = File(pdf_url, lang=self.lang, name=self.title)
         pdf_file.download(download=DOWNLOAD_FILES, base_path=base_path)
         node = pdf_file.to_node()
         if node is not None:
             pdf_nodes.append(node)
     return pdf_nodes
Example #21
0
def _build(args):
    '''Build project, obfuscate all files in the project'''
    project = Project()
    project.open(args.project)
    logging.info('Build project %s ...', args.project)
    capsule = build_path(project.capsule, args.project)

    if not args.only_runtime:
        output = project.output
        mode = project.get_obfuscate_mode()
        files = project.get_build_files(args.force)
        src = project.src
        filepairs = [(os.path.join(src, x), os.path.join(output, x))
                     for x in files]

        logging.info('%s increment build',
                     'Disable' if args.force else 'Enable')
        logging.info('Search scripts from %s', src)
        logging.info('Obfuscate %d scripts with mode %s', len(files), mode)
        for x in files:
            logging.info('\t%s', x)
        logging.info('Save obfuscated scripts to %s', output)

        obfuscate_scripts(filepairs, mode, capsule, output)

        # for x in targets:
        #     output = os.path.join(project.output, x)
        #     pairs = [(os.path.join(src, x), os.path.join(output, x))
        #              for x in files]
        #     for src, dst in pairs:
        #         try:
        #             shutil.copy2(src, dst)
        #         except Exception:
        #             os.makedirs(os.path.dirname(dst))
        #             shutil.copy2(src, dst)
        project['build_time'] = time.time()
        project.save(args.project)

    if not args.no_runtime:
        logging.info('Make runtime files')
        make_runtime(capsule, output)

    if project.entry:
        for x in project.entry.split(','):
            filename = os.path.join(output, x.strip())
            logging.info('Update entry script %s', filename)
            make_entry(filename, project.runtime_path)
    else:
        logging.info('\tIn order to import obfuscated scripts, insert ')
        logging.info('\t2 lines in entry script:')
        logging.info('\t\tfrom pytransfrom import pyarmor_runtime')
        logging.info('\t\tpyarmor_runtime()')

    logging.info('Build project OK.')
 def build_video_nodes(self, base_path, content):
     videos_url = self.get_videos_urls(content)
     base_path = build_path([DATA_DIR, "videos"])
     video_nodes = []
     for video_url in videos_url:
         if YouTubeResource.is_youtube(video_url):
             video = YouTubeResource(video_url, lang=self.lang)
             video.download(download=DOWNLOAD_VIDEOS, base_path=base_path)
             node = video.to_node()
             if node is not None:
                 video_nodes.append(node)
     return video_nodes
Example #23
0
 def test_build_path_with_lines(self):
     fun_name = 'test_build_path_with_lines'
     lines = True
     correct = 'HP' + str(self.valid_booknum) + '/hp' + str(
         self.valid_booknum) + '_' + str(self.valid_pagenum) + '_lines.png'
     try:
         assert correct == utils.build_path(self.valid_booknum,
                                            self.valid_pagenum, lines)
         print(fun_name + ' ' + SUCCESS)
         return 1
     except AssertionError:
         print(fun_name + ' ' + FAILED)
         return 0
    def playlist_name_links(self):
        name_url = []
        source_id_hash = hashlib.sha1(self.source_id.encode("utf-8")).hexdigest()
        base_path = build_path([DATA_DIR, CHANNEL_SOURCE_ID])
        videos_url_path = os.path.join(base_path, "{}.json".format(source_id_hash))

        if file_exists(videos_url_path) and LOAD_VIDEO_LIST is True:
            with open(videos_url_path, "r") as f:
                name_url = json.load(f)
        else:
            for url in self.playlist_links():
                youtube = YouTubeResourceNode(url)
                info = youtube.get_resource_info()
                name_url.append((info["title"], url))
            with open(videos_url_path, "w") as f:
                json.dump(name_url, f)
        return name_url
Example #25
0
def save_thumbnail(url, title):
    import imghdr
    from io import BytesIO
    try:
        r = requests.get(url)
    except:
        return None
    else:
        img_buffer = BytesIO(r.content)
        img_ext = imghdr.what(img_buffer)
        if img_ext != "gif":
            filename = "{}.{}".format(title, img_ext)
            base_dir = build_path([DATA_DIR, DATA_DIR_SUBJECT, "thumbnails"])
            filepath = os.path.join(base_dir, filename)
            with open(filepath, "wb") as f:
                f.write(img_buffer.read())
            return filepath
    def write_video(self, base_path, content):
        videos = content.find_all(
            lambda tag: tag.name == "a" and tag.attrs.get("href", "").find(
                "youtube") != -1 or tag.attrs.get("href", "").find(
                    "youtu.be") != -1 or tag.text.lower() == "youtube")
        VIDEOS_DATA_DIR = build_path([base_path, 'videos'])
        for video in videos:
            youtube = YouTubeResource(video.get("href", ""), lang=self.lang)
            node = get_node_from_channel(youtube.resource_url, channel_tree)
            if node is None:
                youtube.to_file(filepath=VIDEOS_DATA_DIR)
                node = youtube.node

            if node is not None:
                if video.parent.name == 'li':
                    video.parent.replace_with("Video name: " + node["title"])
                if node["source_id"] not in self.ids:
                    self.nodes.append(node)
                    self.ids.add(node["source_id"])
Example #27
0
def updateProject(args):
    '''
    >>> p = newProject()['project']
    >>> updateProject(title='My Project')
    'Update project OK'
    '''
    name = args['name']
    path = os.path.join(project_base_path, name)
    project = Project()
    project.open(path)

    if args['output']:
        args['output'] = build_path(args['output'], path)
    else:
        args['output'] = os.path.join(path, 'dist')
    project._update(args)
    project.save(path)

    return 'Update project OK'
Example #28
0
    def scrape(self, args, options):
        download_video = options.get('--download-video', "1")
        load_video_list = options.get('--load-video-list', "0")

        if int(download_video) == 0:
            global DOWNLOAD_VIDEOS
            DOWNLOAD_VIDEOS = False

        if int(load_video_list) == 1:
            global LOAD_VIDEO_LIST
            LOAD_VIDEO_LIST = True

        global channel_tree
        channel_tree, grades = self.lessons()
        base_path = [DATA_DIR]
        base_path = build_path(base_path)

        for subject in grades:
            for lesson in subject.lessons:
                lesson.download(download=DOWNLOAD_VIDEOS, base_path=base_path)
                channel_tree["children"].append(lesson.to_node())

        return channel_tree
 def download(self, base_path):
     PDFS_DATA_DIR = build_path([base_path, 'pdfs'])
     try:
         response = sess.get(self.source_id)
         content_type = response.headers.get('content-type')
         if 'application/pdf' in content_type:
             self.filepath = os.path.join(PDFS_DATA_DIR, self.filename)
             with open(self.filepath, 'wb') as f:
                 for chunk in response.iter_content(10000):
                     f.write(chunk)
             LOGGER.info("   - Get file: {}, node name: {}".format(
                 self.filename, self.name))
     except requests.exceptions.HTTPError as e:
         LOGGER.info("Error: {}".format(e))
     except requests.exceptions.ConnectionError:
         ### this is a weird error, may be it's raised when the webpage
         ### is slow to respond requested resources
         LOGGER.info(
             "Connection error, the resource will be scraped in 5s...")
         time.sleep(3)
     except requests.exceptions.ReadTimeout as e:
         LOGGER.info("Error: {}".format(e))
     except requests.exceptions.TooManyRedirects as e:
         LOGGER.info("Error: {}".format(e))
Example #30
0
def train(lr,
          w,
          l2_reg,
          epoch,
          batch_size,
          model_type,
          num_layers,
          data_type,
          word2vec,
          num_classes=2):
    if data_type == "WikiQA":
        train_data = WikiQA(word2vec=word2vec)
    elif data_type == "MSRP":
        train_data = MSRP(word2vec=word2vec)

    train_data.open_file(mode="train")

    print("=" * 50)
    print("training data size:", train_data.data_size)
    print("training max len:", train_data.max_len)
    print("=" * 50)

    model = ABCNN(s=train_data.max_len,
                  w=w,
                  l2_reg=l2_reg,
                  model_type=model_type,
                  num_features=train_data.num_features,
                  num_classes=num_classes,
                  num_layers=num_layers)

    optimizer = tf.train.AdagradOptimizer(lr, name="optimizer").minimize(
        model.cost)

    # Due to GTX 970 memory issues
    #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)

    init = tf.global_variables_initializer()

    # keep no more than 100 models
    saver = tf.train.Saver(max_to_keep=100)

    session_config = tf.ConfigProto(allow_soft_placement=True)
    session_config.gpu_options.allow_growth = True
    with tf.Session(config=session_config) as sess:
        #with tf.Session() as sess:
        train_summary_writer = tf.summary.FileWriter("C:/tf_logs/train",
                                                     sess.graph)

        sess.run(init)

        print("=" * 50)
        for e in range(1, epoch + 1):
            print("[Epoch " + str(e) + "]")

            train_data.reset_index()
            i = 0

            clf_features = []

            while train_data.is_available():
                i += 1

                batch_x1, batch_x2, batch_y, batch_features = train_data.next_batch(
                    batch_size=batch_size)

                merged, _, c, features = sess.run(
                    [
                        model.merged, optimizer, model.cost,
                        model.output_features
                    ],
                    feed_dict={
                        model.x1: batch_x1,
                        model.x2: batch_x2,
                        model.y: batch_y,
                        model.features: batch_features
                    })

                clf_features.append(features)

                if i % 100 == 0:
                    print("[batch " + str(i) + "] cost:", c)
                train_summary_writer.add_summary(merged, i)

            save_path = saver.save(sess,
                                   build_path("./models/", data_type,
                                              model_type, num_layers),
                                   global_step=e)
            print("model saved as", save_path)

            clf_features = np.concatenate(clf_features)

        print("training finished!")
        print("=" * 50)