Beispiel #1
0
def generate_population(views=10, pop_size=10, verbose=0):
    '''
    种群初始化
    :param views: 视图个数
    :param pop_size: 种群大小
    :return:
    '''
    fusion_ways = config.get_configs()['fusion_ways']
    population = []
    population_set = set()
    while len(population) < pop_size:
    # for i in range(pop_size):
        # view_code at least contains two elements
        view_code = random.sample(range(0, views), k=random.randint(2, views))
        fusion_code = random.choices(range(0, len(fusion_ways)), k=len(view_code)-1)
        pop = view_code+fusion_code
        if verbose == 1:
            print(f'view_code:{view_code}')
            print(f'fusion_code:{fusion_code}')
            print(f'pop:{pop}')
            print('='*30)
        if utils.list2str(pop) not in population_set:
            population.append(pop)
            population_set.add(utils.list2str(pop))
    return population
Beispiel #2
0
def print_block_table(block_codepoints):
    for block in block_codepoints:
        byte_no = bits2codepoint(block)
        character = chr(byte_no)
        print "%s %4s %3s %s" % (
            list2str(block), hex(byte_no), byte_no, repr(character)
        )
Beispiel #3
0
def selection(P_t, Q_t):
    shared_code_acc = utils.load_result()

    # print(f'P_t: {P_t}')
    # print(f'Q_t: {Q_t}')
    # print(f'f: {shared_code_acc}')

    def select_p1(select_pool):
        two = random.sample(range(len(select_pool)), 2)
        a1 = '-'.join([str(i) for i in select_pool[two[0]]])
        a2 = '-'.join([str(i) for i in select_pool[two[1]]])
        p1 = select_pool[two[0]] if shared_code_acc[a1] > shared_code_acc[
            a2] else select_pool[two[1]]
        return p1

    P_t1 = []
    Pt_Qt = P_t + Q_t
    while len(P_t1) < len(P_t):
        p = select_p1(Pt_Qt)
        P_t1.append(p)

    # 如果最好的个体不在P_t1,用最好的替换最差的
    max_code = []
    for k, v in shared_code_acc.items():
        if v == max(shared_code_acc.values()):
            max_code_str = k
            max_code = k.strip().split('-')
            max_code = [int(i) for i in max_code]
        if v == min(shared_code_acc.values()):
            min_code_str = k

    is_max = False
    for i, v in enumerate(P_t1):
        v_str = utils.list2str(v)
        if v_str == max_code_str:
            is_max = True
            break
    if not is_max:
        min_i = 0
        for i, v in enumerate(P_t1):
            v_str = utils.list2str(v)
            if v_str == min_code_str:
                min_i = i
                break
        P_t1[min_i] = max_code
    return P_t1
Beispiel #4
0
def save_profile(request):
    if request.method == 'POST':
        form = {k: list2str(v) for k, v in request.POST.items()}
        form.pop('csrfmiddlewaretoken', None)
        form.pop('button', None)
        #tours_user.objects.create(**form)
        print(form)
    
    return redirect(reverse('profile:index'))
Beispiel #5
0
    def super_key(self, table: str) -> list:
        # La table doit exister
        if table not in self.tables:
            raise UnknownTableError()

        att = self.get_fields(table)
        return [
            sub for sub in utils.get_all_subset(att)
            if self.is_key(table, utils.list2str(sub))
        ]
Beispiel #6
0
    def sync_bitstream(self, bitstream):
        log.debug("start sync bitstream at wave pos: %s" % bitstream.pformat_pos())
        bitstream.sync(32) # Sync bitstream to wave sinus cycle

#         test_bitstream = list(itertools.islice(bitstream, 258 * 8))
#         print_bitlist(test_bitstream)

        log.debug("Searching for lead-in byte at wave pos: %s" % bitstream.pformat_pos())

        # Searching for lead-in byte
        lead_in_pattern = list(codepoints2bitstream(self.cfg.LEAD_BYTE_CODEPOINT))
        max_pos = self.cfg.LEAD_BYTE_LEN * 8
        try:
            leader_pos = find_iter_window(bitstream, lead_in_pattern, max_pos)
        except MaxPosArraived, err:
            log.error("Error: Leader-Byte '%s' (%s) not found in the first %i Bytes! (%s)" % (
                list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT),
                self.cfg.LEAD_BYTE_LEN, err
            ))
Beispiel #7
0
class BitstreamHandler(BitstreamHandlerBase):
    """
    feed with wave bitstream
    """
    def get_block_info(self, bitstream):
        # convert the raw bitstream to codepoint stream
        codepoint_stream = bitstream2codepoints(bitstream)

        return super(BitstreamHandler, self).get_block_info(codepoint_stream)

    def sync_bitstream(self, bitstream):
        log.debug("start sync bitstream at wave pos: %s" %
                  bitstream.pformat_pos())
        bitstream.sync(32)  # Sync bitstream to wave sinus cycle

        #         test_bitstream = list(itertools.islice(bitstream, 258 * 8))
        #         print_bitlist(test_bitstream)

        log.debug("Searching for lead-in byte at wave pos: %s" %
                  bitstream.pformat_pos())

        # Searching for lead-in byte
        lead_in_pattern = list(
            codepoints2bitstream(self.cfg.LEAD_BYTE_CODEPOINT))
        max_pos = self.cfg.LEAD_BYTE_LEN * 8
        try:
            leader_pos = find_iter_window(bitstream, lead_in_pattern, max_pos)
        except MaxPosArraived, err:
            log.error(
                "Error: Leader-Byte '%s' (%s) not found in the first %i Bytes! (%s)"
                %
                (list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT),
                 self.cfg.LEAD_BYTE_LEN, err))
        except PatternNotFound, err:
            log.error(
                "Error: Leader-Byte '%s' (%s) doesn't exist in bitstream! (%s)"
                % (list2str(lead_in_pattern), hex(
                    self.cfg.LEAD_BYTE_CODEPOINT), err))
Beispiel #8
0
    def sync_bitstream(self, bitstream):
        log.debug("start sync bitstream at wave pos: %s" %
                  bitstream.pformat_pos())
        bitstream.sync(32)  # Sync bitstream to wave sinus cycle

        #         test_bitstream = list(itertools.islice(bitstream, 258 * 8))
        #         print_bitlist(test_bitstream)

        log.debug("Searching for lead-in byte at wave pos: %s" %
                  bitstream.pformat_pos())

        # Searching for lead-in byte
        lead_in_pattern = list(
            codepoints2bitstream(self.cfg.LEAD_BYTE_CODEPOINT))
        max_pos = self.cfg.LEAD_BYTE_LEN * 8
        try:
            leader_pos = find_iter_window(bitstream, lead_in_pattern, max_pos)
        except MaxPosArraived, err:
            log.error(
                "Error: Leader-Byte '%s' (%s) not found in the first %i Bytes! (%s)"
                %
                (list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT),
                 self.cfg.LEAD_BYTE_LEN, err))
Beispiel #9
0
        lead_in_pattern = list(codepoints2bitstream(self.cfg.LEAD_BYTE_CODEPOINT))
        max_pos = self.cfg.LEAD_BYTE_LEN * 8
        try:
            leader_pos = find_iter_window(bitstream, lead_in_pattern, max_pos)
        except MaxPosArraived, err:
            log.error("Error: Leader-Byte '%s' (%s) not found in the first %i Bytes! (%s)" % (
                list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT),
                self.cfg.LEAD_BYTE_LEN, err
            ))
        except PatternNotFound, err:
            log.error("Error: Leader-Byte '%s' (%s) doesn't exist in bitstream! (%s)" % (
                list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT), err
            ))
        else:
            log.info("Leader-Byte '%s' (%s) found at %i Bytes (wave pos: %s)" % (
                list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT),
                leader_pos, bitstream.pformat_pos()
            ))

        log.debug("Search for sync-byte at wave pos: %s" % bitstream.pformat_pos())

        # Search for sync-byte
        sync_pattern = list(codepoints2bitstream(self.cfg.SYNC_BYTE_CODEPOINT))
        max_search_bits = self.cfg.MAX_SYNC_BYTE_SEARCH * 8
        try:
            sync_pos = find_iter_window(bitstream, sync_pattern, max_search_bits)
        except MaxPosArraived, err:
            raise SyncByteNotFoundError(
                "Error: Sync-Byte '%s' (%s) not found in the first %i Bytes! (%s)" % (
                    list2str(sync_pattern), hex(self.cfg.SYNC_BYTE_CODEPOINT),
                    self.cfg.MAX_SYNC_BYTE_SEARCH, err
Beispiel #10
0
def getData(url, source, maxurls, src):
    count = 0
    paper = newspaper.build(url, memoize_articles=False)
    length = len(paper.articles)
    uberdata = []
    urlcache = U.getUrlCache(src)
    logfile = U.getLogdir() + "/" + src + ".log"

    U.logger(logfile, "Begin processing %s..." % src)

    if maxurls is None or maxurls > length:
        maxurls = length

    for article in paper.articles[0:maxurls]:
        count += 1
        print "[%d/%d] %s" % (count, length, article.url)
        U.logger(logfile, "[%d/%d] URL: %s" % (count, length, article.url))

        # If article.url is alrady in cache...
        if article.url in urlcache:
            continue

        # If article.url has already been seen... (prevent double-counting)
        if article.url in urlcache:
            continue

        # If story is linked from an outside source...
        if not re.search('\W' + src + '\W', article.url):
            continue

        urlcache.append(article.url)
        try:
            article.download()
            article.parse()

            title = article.title
            if title == 'Error':
                continue

            text = article.text
            if len(text) < 10:
                continue

            (keywords, score) = U.findKeywordsInText(text)
            if len(keywords) == 0:
                continue

            if article.publish_date is not None:
                publish_date = article.publish_date.strftime("%B %d, %Y")
            else:
                publish_date = ''

            article.nlp()

            summary = text

            max_summary_len = U.getMaxSummaryLen()
            if len(summary) > max_summary_len:
                summary = summary[0:max_summary_len]

            data = dict()
            data['url'] = article.url
            data['top_image'] = article.top_image
            data['publish_date'] = publish_date
            data['authors'] = article.authors
            data['nlp_keywords'] = U.list2str(article.keywords)
            data['keywords'] = U.list2str(keywords)
            data['title'] = title
            data['source'] = source
            data['score'] = score
            data['summary'] = summary
            uberdata.append(data)
        except:
            print traceback.format_exc()
            U.logger(logfile, traceback.format_exc())

    U.cacheUrls(urlcache, src)
    U.logger(logfile, "Done.")

    print "### DEFAULT DATA GRABBER ###"

    return uberdata
Beispiel #11
0
def getData(url, source, maxurls):
    src      = 'cnn'
    count    = 0
    paper    = newspaper.build(url,memoize_articles=False, language='en')
    uberdata = []
    urlcache = U.getUrlCache(src)
    logfile  = U.getLogdir() + "/" + src + ".log"

    U.logger(logfile, "Begin processing %s..." % src)

    articles = removeUnwantedUrls(paper.articles)
    length = len(articles)

    if maxurls is None or maxurls > length:
        maxurls = length

    for article in articles[0:maxurls]:
        count += 1
        print "[%d/%d] %s" % (count, length, article.url)
        U.logger(logfile, "[%d/%d] URL: %s" % (count, length, article.url))

        # If article.url is alrady in cache...
        if article.url in urlcache:
            continue

        # If article.url has already been seen in this run... (prevent double-counting)
        if article.url in urlcache:
            continue

        urlcache.append(article.url)
        try:
            article.download()
            article.parse()

            title = article.title
            if title == 'Error':
                continue

            text = article.text
            if len(text) < 10:
                continue

            (keywords,score) = U.findKeywordsInText(text)
            if len(keywords) == 0:
                continue

            if article.publish_date is not None:
                publish_date = article.publish_date.strftime("%B %d, %Y")
            else:
                publish_date = ''

            article.nlp()

            # TODO: This needs improvement
            summary = text.split('(CNN)')
            if len(summary) > 1:
                summary = summary[1]
            else:
                summary = summary[0]

            max_summary_len = U.getMaxSummaryLen()
            if len(summary) > max_summary_len:
                summary = summary[0:max_summary_len]

            data = dict()
            data['url']          = article.url
            data['top_image']    = article.top_image
            data['publish_date'] = publish_date
            data['authors']      = article.authors
            data['nlp_keywords'] = U.list2str(article.keywords)
            data['keywords']     = U.list2str(keywords)
            data['title']        = title
            data['source']       = source
            data['score']        = score
            data['summary']      = summary
            uberdata.append(data)
        except:
            print traceback.format_exc()
            U.logger(logfile, traceback.format_exc())

    U.cacheUrls(urlcache, src)
    U.logger(logfile, "Done.")

    print "### CNN DATA GRABBER ###"

    return uberdata
Beispiel #12
0
def print_block_table(block_codepoints):
    for block in block_codepoints:
        byte_no = bits2codepoint(block)
        character = chr(byte_no)
        print "%s %4s %3s %s" % (list2str(block), hex(byte_no), byte_no,
                                 repr(character))
Beispiel #13
0
            leader_pos = find_iter_window(bitstream, lead_in_pattern, max_pos)
        except MaxPosArraived, err:
            log.error(
                "Error: Leader-Byte '%s' (%s) not found in the first %i Bytes! (%s)"
                %
                (list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT),
                 self.cfg.LEAD_BYTE_LEN, err))
        except PatternNotFound, err:
            log.error(
                "Error: Leader-Byte '%s' (%s) doesn't exist in bitstream! (%s)"
                % (list2str(lead_in_pattern), hex(
                    self.cfg.LEAD_BYTE_CODEPOINT), err))
        else:
            log.info(
                "Leader-Byte '%s' (%s) found at %i Bytes (wave pos: %s)" %
                (list2str(lead_in_pattern), hex(self.cfg.LEAD_BYTE_CODEPOINT),
                 leader_pos, bitstream.pformat_pos()))

        log.debug("Search for sync-byte at wave pos: %s" %
                  bitstream.pformat_pos())

        # Search for sync-byte
        sync_pattern = list(codepoints2bitstream(self.cfg.SYNC_BYTE_CODEPOINT))
        max_search_bits = self.cfg.MAX_SYNC_BYTE_SEARCH * 8
        try:
            sync_pos = find_iter_window(bitstream, sync_pattern,
                                        max_search_bits)
        except MaxPosArraived, err:
            raise SyncByteNotFoundError(
                "Error: Sync-Byte '%s' (%s) not found in the first %i Bytes! (%s)"
                % (list2str(sync_pattern), hex(self.cfg.SYNC_BYTE_CODEPOINT),