Ejemplo n.º 1
0
def classifier(contents):
    contents.insert(0, content.Content())
    contents.append(content.Content())
    contents_len = len(contents)
    for idx, cur in enumerate(contents):
        if idx == 0 or idx == (contents_len - 1):
            continue
        prev = contents[idx - 1]
        next = contents[idx + 1]
        cur.is_content = words_classify(prev, cur, next)
Ejemplo n.º 2
0
 def __connect_db(self):
     if self.text_db_value.get() != '':
         try:
             self.__get_db_info(self.db_info)
             dsn = cxo.makedsn(self.db_info.get("host"),
                               self.db_info.get("port"),
                               self.db_info.get("server_name"))
             self.pool = cxo.SessionPool(self.db_info.get("user_name"),
                                         self.db_info.get("user_password"),
                                         dsn,
                                         min=1,
                                         max=500,
                                         increment=1,
                                         threaded=True)
             msg.showinfo(title='connect succedd',
                          message='connect succeed',
                          parent=self)
             self.destroy()
             a = ct.Content(self.pool, self.file_name)
         except:
             msg.showerror(
                 title='connection error',
                 message='error when try to connect the pool :\n' +
                 traceback.format_exc(),
                 parent=self)
     else:
         msg.showerror(title='please choose one database',
                       message='please choose one database',
                       parent=self)
Ejemplo n.º 3
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.skip = False
     self.body = False
     self.inline = False
     self.href = None
     self.content = content.Content()
     self.contents = [self.content]
Ejemplo n.º 4
0
 def fromDict(data):
     metadata = data.get("metadata")
     metadata = AttachmentMetadata(metadata.get("filename"),
                                   metadata.get("content_type"),
                                   metadata.get("content_disposition"),
                                   metadata.get("content_id"),
                                   metadata.get("size"))
     return Attachment(metadata, cnt.Content(**data.get("content")))
Ejemplo n.º 5
0
    def __init__(self, parent, f, yadodata=False):
        base.CWBinaryBase.__init__(self, parent, f, yadodata)
        contents_num = f.dword()
        self.contents = [
            content.Content(self, f, 0) for _cnt in xrange(contents_num)
        ]

        self.data = None
Ejemplo n.º 6
0
 def upsert_content(self, content_text, external_id, data_source):
     c = self.__get_cursor()
     c.execute(
         "INSERT OR REPLACE INTO " + self.content_table_name +
         " (base_text, external_id, data_source) VALUES (?,?,?)",
         (content_text, external_id, data_source))
     self.db.commit()
     return content.Content(c.lastrowid, content_text, external_id,
                            data_source)
Ejemplo n.º 7
0
def create_media_card(objs):
    """Single function to inject content into html"""
    contentaux = ''
    movies = []
    tvshows = []
    for item in objs:
        if isinstance(item, movie.Movie):
            movies.append(item)
        else:
            tvshows.append(item)
    for mov in movies:
        tile = MEDIA_CONTENT
        jsonobj = content.Content()
        jsonobj.title = mov.title
        jsonobj.date = mov.release_date
        jsonobj.storyline = re.sub("\n", " ", mov.storyline)
        jsonobj.youtubeurl = mov.trailer_url
        jsonobj.raiting = mov.rating
        jsonstr = json.dumps(jsonobj.__dict__)
        tile = tile.replace("{{poster}}", mov.poster_url)
        tile = tile.replace("{{title}}", mov.title)
        tile = tile.replace("{{releasedate}}", mov.release_date)
        tile = tile.replace("{{storyline}}", mov.storyline)
        tile = tile.replace("{{json}}", jsonstr)
        contentaux = contentaux + " " + tile

    rendered_content = re.sub("{{tiles}}", contentaux, MAIN_PAGE_CONTENT)
    contentaux = ''
    for tvshow in tvshows:
        tileshow = MEDIA_CONTENT
        jsonobj = content.Content()
        jsonobj.title = tvshow.title
        jsonobj.date = tvshow.premiere_date
        jsonobj.storyline = re.sub("\n", " ", tvshow.storyline)
        jsonobj.youtubeurl = tvshow.trailer_url
        jsonstr = json.dumps(jsonobj.__dict__)
        tileshow = tileshow.replace("{{poster}}", tvshow.poster_url)
        tileshow = tileshow.replace("{{title}}", tvshow.title)
        tileshow = tileshow.replace("{{releasedate}}", tvshow.premiere_date)
        tileshow = tileshow.replace("{{storyline}}", tvshow.storyline)
        tileshow = tileshow.replace("{{json}}", jsonstr)
        contentaux = contentaux + " " + tileshow
    rendered_content = re.sub("{{tvshowitems}}", contentaux, rendered_content)
    return rendered_content
Ejemplo n.º 8
0
 def get_content(self, source, external_id):
     c = self.__get_cursor()
     c.execute(
         "SELECT * FROM " + self.content_table_name +
         " WHERE external_id=? AND data_source=?", (external_id, source))
     content_data = c.fetchone()
     if content_data:
         return content.Content(*content_data)
     else:
         return None
Ejemplo n.º 9
0
    def __init__(self, parent, f, yadodata=False):
        base.CWBinaryBase.__init__(self, parent, f, yadodata)
        contents_num = f.dword()
        self.contents = [
            content.Content(self, f, 0) for _cnt in xrange(contents_num)
        ]
        ignitions_num = f.dword()
        self.ignitions = [f.dword() for _cnt in xrange(ignitions_num)]
        self.keycodes = f.string(True)

        self.data = None
Ejemplo n.º 10
0
    def handle_data(self, data):
        if self.skip or not self.body:
            return

        if not self.inline:
            if self.content.text:
                self.content = content.Content()
                self.contents.append(self.content)
                self.inline = True

        self.content.set_data(' '.join(data.split()), self.href)

        if self.href:
            self.content.set_href(self.href)
Ejemplo n.º 11
0
def render(file_path, website_dir):
    page = content.Content()
    page.parse(file_path)

    template = jinja_env.get_template(page.metadata['template'] + '.html')
    context = {
        'html_content': page.html,
        'metadata': page.metadata,
    }
    output = template.render(context)

    output_path = os.path.join(website_dir, page.metadata['path'])
    # Make sure the subdirectories exist before writing to the file
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, 'w+') as f:
        f.write(output)
Ejemplo n.º 12
0
 def __init__(self):
     self.template = template.Template(template)
     arg_parser = ArgumentParser()
     arg_parser.add_argument("-u", "--url", dest="url")
     args = arg_parser.parse_args()
     if args.url is None:
         print("Вы забыли ввести URL")
     else:
         page = url.Url(args.url)
         data = page.get_content()
         path = page.parse_url()
         current_template = self.template.get_template()
         html_parser = MyHTMLParser(current_template)
         html_parser.feed(data)
         self.content = content.Content(current_template, html_parser.content)
         write_data = self.content.format_data()
         self.save_data(path, write_data)
Ejemplo n.º 13
0
 def factory(self, role, config):
     print(self)
     print(role)
     print(config)
     #role = self.checkForMaster(role, config)
     if role == "content":
         import content
         self.child = content.Content(config)
         print('Content')
     '''if role == "transcode":
         import transcode
         self.child = transcode.Transcode(config)
         print('transcode')'''
     if role == "master":
         import master
         self.child = None
         self.master = master.Master(config)
         print('master')
Ejemplo n.º 14
0
    def fromFileStorage(_file, metadata):
        if not isinstance(metadata, AttachmentMetadata):
            raise EmailException(
                u"metadata must be of type AttachmentMetadata")

        content = _file.read()
        content = encodeContentIfUnicode(content)

        # flanker defaults the mime header to (application/octet-stream) if c-t not specified
        # it also makes some assumptions based on filename if c-t is (application/octet-stream)
        # so keeping our object consistent with the MIME which is going to be generated
        try:
            main, sub = mime.message.headers.parametrized.fix_content_type(
                _file.content_type, default=(u"application", u"octet-stream"))
            content_type = mime.message.part.adjust_content_type(
                mime.message.ContentType(main, sub), content, _file.filename)
            metadata.content_type = content_type.value
        except mime.MimeError as e:
            raise EmailException(
                u"fromFileStorage: flanker exception, value: {}".format(e))

        metadata.size = len(content)
        return Attachment(metadata, cnt.Content(content))
Ejemplo n.º 15
0
def select_content(id):
	metadata = None
	dat = None
	try:
		_lock.acquire()
		db = sqlite3.connect(DB_FILENAME)
		cursor = db.cursor()
		cursor.execute(CONTENT_METADATA_SELECT_BY_ID, (id,))
		metadata = cursor.fetchone()
		if metadata is None:
			raise Exception("Content not found")
		cursor.execute(CONTENT_DATA_SELECT_BY_ID, (id,))
		datrow = cursor.fetchone()
		if datrow is None:
			raise Exception("Content not found")
		dat = datrow[0]
	finally:
		db.close()
		_lock.release()
	ret = content.Content()
	ret.metadata = map_metadata(metadata)
	ret.data = dat
	return ret
Ejemplo n.º 16
0
print('MODEL_NAME: {}'.format(MODEL_NAME))
print( type(MODEL_NAME) )
print('SHOULD_REBUILD: {}'.format(should_rebuild))
print( type(should_rebuild) )

# #########################

# Model, stores
dictionary = corpora.Dictionary()
lemma = WordNetLemmatizer()

# Access DataBase content, 
# build content (Object) for modeling and analysis
# DATABASE=os.path.abspath(DATABASE_FILE)
DATABASE = utils.get_file_path(DATABASE_FILE)
content = content.Content(DATABASE)

# Create a Dictionary
## Vector Space of words and word_count
dictionary = utils.build_dictionary(content, should_rebuild, DICT_BACKUP)

# Create a Corpus
corpus = utils.build_corpus(dictionary, content, should_rebuild, CORPUS_BACKUP)
print('Corpus Size: {}'.format( len(corpus) ))

# Configuration for modeling
model_config = {}
model_config['RANDOM_STATE'] = RANDOM_STATE
model_config['NUM_TOPICS'] = NUM_TOPICS
model_config['PASSES'] = NUM_PASSES
model_config['MODEL_NAME'] = MODEL_NAME