def classifier(contents): contents.insert(0, content.Content()) contents.append(content.Content()) contents_len = len(contents) for idx, cur in enumerate(contents): if idx == 0 or idx == (contents_len - 1): continue prev = contents[idx - 1] next = contents[idx + 1] cur.is_content = words_classify(prev, cur, next)
def __connect_db(self): if self.text_db_value.get() != '': try: self.__get_db_info(self.db_info) dsn = cxo.makedsn(self.db_info.get("host"), self.db_info.get("port"), self.db_info.get("server_name")) self.pool = cxo.SessionPool(self.db_info.get("user_name"), self.db_info.get("user_password"), dsn, min=1, max=500, increment=1, threaded=True) msg.showinfo(title='connect succedd', message='connect succeed', parent=self) self.destroy() a = ct.Content(self.pool, self.file_name) except: msg.showerror( title='connection error', message='error when try to connect the pool :\n' + traceback.format_exc(), parent=self) else: msg.showerror(title='please choose one database', message='please choose one database', parent=self)
def __init__(self): HTMLParser.__init__(self) self.skip = False self.body = False self.inline = False self.href = None self.content = content.Content() self.contents = [self.content]
def fromDict(data): metadata = data.get("metadata") metadata = AttachmentMetadata(metadata.get("filename"), metadata.get("content_type"), metadata.get("content_disposition"), metadata.get("content_id"), metadata.get("size")) return Attachment(metadata, cnt.Content(**data.get("content")))
def __init__(self, parent, f, yadodata=False): base.CWBinaryBase.__init__(self, parent, f, yadodata) contents_num = f.dword() self.contents = [ content.Content(self, f, 0) for _cnt in xrange(contents_num) ] self.data = None
def upsert_content(self, content_text, external_id, data_source): c = self.__get_cursor() c.execute( "INSERT OR REPLACE INTO " + self.content_table_name + " (base_text, external_id, data_source) VALUES (?,?,?)", (content_text, external_id, data_source)) self.db.commit() return content.Content(c.lastrowid, content_text, external_id, data_source)
def create_media_card(objs): """Single function to inject content into html""" contentaux = '' movies = [] tvshows = [] for item in objs: if isinstance(item, movie.Movie): movies.append(item) else: tvshows.append(item) for mov in movies: tile = MEDIA_CONTENT jsonobj = content.Content() jsonobj.title = mov.title jsonobj.date = mov.release_date jsonobj.storyline = re.sub("\n", " ", mov.storyline) jsonobj.youtubeurl = mov.trailer_url jsonobj.raiting = mov.rating jsonstr = json.dumps(jsonobj.__dict__) tile = tile.replace("{{poster}}", mov.poster_url) tile = tile.replace("{{title}}", mov.title) tile = tile.replace("{{releasedate}}", mov.release_date) tile = tile.replace("{{storyline}}", mov.storyline) tile = tile.replace("{{json}}", jsonstr) contentaux = contentaux + " " + tile rendered_content = re.sub("{{tiles}}", contentaux, MAIN_PAGE_CONTENT) contentaux = '' for tvshow in tvshows: tileshow = MEDIA_CONTENT jsonobj = content.Content() jsonobj.title = tvshow.title jsonobj.date = tvshow.premiere_date jsonobj.storyline = re.sub("\n", " ", tvshow.storyline) jsonobj.youtubeurl = tvshow.trailer_url jsonstr = json.dumps(jsonobj.__dict__) tileshow = tileshow.replace("{{poster}}", tvshow.poster_url) tileshow = tileshow.replace("{{title}}", tvshow.title) tileshow = tileshow.replace("{{releasedate}}", tvshow.premiere_date) tileshow = tileshow.replace("{{storyline}}", tvshow.storyline) tileshow = tileshow.replace("{{json}}", jsonstr) contentaux = contentaux + " " + tileshow rendered_content = re.sub("{{tvshowitems}}", contentaux, rendered_content) return rendered_content
def get_content(self, source, external_id): c = self.__get_cursor() c.execute( "SELECT * FROM " + self.content_table_name + " WHERE external_id=? AND data_source=?", (external_id, source)) content_data = c.fetchone() if content_data: return content.Content(*content_data) else: return None
def __init__(self, parent, f, yadodata=False): base.CWBinaryBase.__init__(self, parent, f, yadodata) contents_num = f.dword() self.contents = [ content.Content(self, f, 0) for _cnt in xrange(contents_num) ] ignitions_num = f.dword() self.ignitions = [f.dword() for _cnt in xrange(ignitions_num)] self.keycodes = f.string(True) self.data = None
def handle_data(self, data): if self.skip or not self.body: return if not self.inline: if self.content.text: self.content = content.Content() self.contents.append(self.content) self.inline = True self.content.set_data(' '.join(data.split()), self.href) if self.href: self.content.set_href(self.href)
def render(file_path, website_dir): page = content.Content() page.parse(file_path) template = jinja_env.get_template(page.metadata['template'] + '.html') context = { 'html_content': page.html, 'metadata': page.metadata, } output = template.render(context) output_path = os.path.join(website_dir, page.metadata['path']) # Make sure the subdirectories exist before writing to the file os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, 'w+') as f: f.write(output)
def __init__(self): self.template = template.Template(template) arg_parser = ArgumentParser() arg_parser.add_argument("-u", "--url", dest="url") args = arg_parser.parse_args() if args.url is None: print("Вы забыли ввести URL") else: page = url.Url(args.url) data = page.get_content() path = page.parse_url() current_template = self.template.get_template() html_parser = MyHTMLParser(current_template) html_parser.feed(data) self.content = content.Content(current_template, html_parser.content) write_data = self.content.format_data() self.save_data(path, write_data)
def factory(self, role, config): print(self) print(role) print(config) #role = self.checkForMaster(role, config) if role == "content": import content self.child = content.Content(config) print('Content') '''if role == "transcode": import transcode self.child = transcode.Transcode(config) print('transcode')''' if role == "master": import master self.child = None self.master = master.Master(config) print('master')
def fromFileStorage(_file, metadata): if not isinstance(metadata, AttachmentMetadata): raise EmailException( u"metadata must be of type AttachmentMetadata") content = _file.read() content = encodeContentIfUnicode(content) # flanker defaults the mime header to (application/octet-stream) if c-t not specified # it also makes some assumptions based on filename if c-t is (application/octet-stream) # so keeping our object consistent with the MIME which is going to be generated try: main, sub = mime.message.headers.parametrized.fix_content_type( _file.content_type, default=(u"application", u"octet-stream")) content_type = mime.message.part.adjust_content_type( mime.message.ContentType(main, sub), content, _file.filename) metadata.content_type = content_type.value except mime.MimeError as e: raise EmailException( u"fromFileStorage: flanker exception, value: {}".format(e)) metadata.size = len(content) return Attachment(metadata, cnt.Content(content))
def select_content(id): metadata = None dat = None try: _lock.acquire() db = sqlite3.connect(DB_FILENAME) cursor = db.cursor() cursor.execute(CONTENT_METADATA_SELECT_BY_ID, (id,)) metadata = cursor.fetchone() if metadata is None: raise Exception("Content not found") cursor.execute(CONTENT_DATA_SELECT_BY_ID, (id,)) datrow = cursor.fetchone() if datrow is None: raise Exception("Content not found") dat = datrow[0] finally: db.close() _lock.release() ret = content.Content() ret.metadata = map_metadata(metadata) ret.data = dat return ret
print('MODEL_NAME: {}'.format(MODEL_NAME)) print( type(MODEL_NAME) ) print('SHOULD_REBUILD: {}'.format(should_rebuild)) print( type(should_rebuild) ) # ######################### # Model, stores dictionary = corpora.Dictionary() lemma = WordNetLemmatizer() # Access DataBase content, # build content (Object) for modeling and analysis # DATABASE=os.path.abspath(DATABASE_FILE) DATABASE = utils.get_file_path(DATABASE_FILE) content = content.Content(DATABASE) # Create a Dictionary ## Vector Space of words and word_count dictionary = utils.build_dictionary(content, should_rebuild, DICT_BACKUP) # Create a Corpus corpus = utils.build_corpus(dictionary, content, should_rebuild, CORPUS_BACKUP) print('Corpus Size: {}'.format( len(corpus) )) # Configuration for modeling model_config = {} model_config['RANDOM_STATE'] = RANDOM_STATE model_config['NUM_TOPICS'] = NUM_TOPICS model_config['PASSES'] = NUM_PASSES model_config['MODEL_NAME'] = MODEL_NAME