def finished(self, db): pods = EslPod.query.filter_by(is_parsed=0).order_by('publish_time').all() c = LanguageChannel.query.filter_by(name='eslpod').first() if c is None: print "eslpod channel is None" return for pod in pods: #print pod.url r = LanguageResource.query.filter_by(channel_id=c.id, resource_url=pod.url).first() if r is None: entity = Entity() entity.title = pod.title entity.isa = "english" to_db.session.add(entity) to_db.session.commit() r = LanguageResource() r.id = entity.id r.channel_id = c.id r.study_count = 0 r.dl_count = randint(20,40) title = pod.title.replace(u'–',"").replace(u'-',"").replace(u'\x92',"").replace(u'\x96',"") r.title = eslpod_title.sub("",title).strip() print r.title r.description = pod.content r.resource_url = pod.url r.publish_time = pod.publish_time to_db.session.add(r) to_db.session.commit() if pod.media: media_file = pod.media.split('/')[-1] normal_media_file = "eslpod_%d.mp3" % r.id # media_file.lower() #dest_orig_media_file = options.local_media_path + "/static/english/eslpod/%s" % media_file #dest_orig_media_url = "/static/english/eslpod/%s" % media_file dest_media_file = options.local_media_path + "/static/english/eslpod/%s" % normal_media_file dest_media_url = "/static/english/eslpod/%s" % normal_media_file dest_tmp_file = "/tmp/eslpod/%s" % media_file if not os.path.exists(dest_media_file): try: print "download pod media: %s" % pod.media, dest_media_file u = urllib.FancyURLopener() u.retrieve(pod.media, dest_tmp_file) except IOError as e: raise e r.original_media = pod.media if not os.path.exists(dest_tmp_file): print "%s not exists" % dest_tmp_file else: split_media_file = "split_"+media_file.lower() dest_split_media_file = "/tmp/eslpod/%s" % (split_media_file) fast_dialog = pod.fast_dialog try: m = re.findall("(\d+):(\d+)", pod.fast_dialog) if len(m) >= 1 and len(m[0]) >=2: minu = int(m[0][0]) second = int(m[0][1]) second -= 1 if second < 0: second=0 fast_dialog = "%d:%d" % (minu, second) print fast_dialog else: print "not find fast_dialog" , pod.fast_dialog except Exception, e: print e pass if fast_dialog: start_time = fast_dialog.replace(':', '.') cmd = "mp3splt -o %s %s %s EOF-0.30" % (split_media_file.split('.')[0], dest_tmp_file, start_time) try: print cmd os.system(cmd) mv_cmd = "mv %s %s" % (dest_split_media_file, dest_media_file) print mv_cmd os.system(mv_cmd) except Exception, e: print e else: try: mv_cmd = "mv %s %s" % (dest_tmp_file, dest_media_file) print mv_cmd os.system(mv_cmd) except Exception, e: print e
def finished(self, db): pods = Voa.query.filter_by(is_parsed=0).order_by('publish_time').all() c = LanguageChannel.query.filter_by(name='voa special').first() if c is None: print "voa channel is None" return for pod in pods: #print pod.url r = LanguageResource.query.filter_by(channel_id=c.id, resource_url=pod.url).first() if r is None: entity = Entity() entity.title = pod.title entity.isa = "english" to_db.session.add(entity) to_db.session.commit() r = LanguageResource() r.id = entity.id r.channel_id = c.id r.study_count = 0 r.dl_count = randint(20,40) if pod.media: r.slow_media = pod.media media_file = "voa_%d.mp3" % r.id dest_media_file = options.local_media_path + "/static/english/voa/%s" % media_file dest_media_url = "/static/english/voa/%s" % media_file dest_tmp_file = "/tmp/voa/%s" % media_file if not os.path.exists(dest_media_file): try: print "download pod media: %s, %s" % (pod.media, dest_tmp_file) u = urllib.FancyURLopener() u.retrieve(pod.media, dest_tmp_file) except IOError as e: raise e r.original_media = pod.media if not os.path.exists(dest_tmp_file): print "%s not exists" % dest_tmp_file else: try: mv_cmd = "mv %s %s" % (dest_tmp_file, dest_media_file) print mv_cmd os.system(mv_cmd) except Exception, e: print e if os.path.exists(dest_media_file): print dest_media_file media_md5 = file_md5(dest_media_file) r.slow_media = "/static/english/voa/%s?md5=%s" % (media_file, media_md5) r.title = pod.title r.description = pod.content r.resource_url = pod.url r.category = pod.category r.publish_time = pod.publish_time to_db.session.add(r) to_db.session.commit() pod.resource_id = r.id pod.is_parsed = 200 crawldb.session.add(pod) crawldb.session.commit()