コード例 #1
0
ファイル: get_toc.py プロジェクト: fdq09eca/cs-project
def _get_page_by_page_title_search(pdfplumber_obj, keywords_pattern=None, verbose=False) -> list:
    '''
    return a list of pages that contains title_pattern
    '''
    if verbose:
        print(f'searching by page!')
    if keywords_pattern is None:
        keywords_pattern =  r'^(?!.*internal)(?=.*report).*auditor.*$'
    pages = []
    for p, page in enumerate(pdfplumber_obj.pages):
        if verbose:
            print(f'searching p.{p}')
        try:
            title_alike_txts = get_title_liked_txt(page)
        except KeyError:
            logging.warning('Non textual page')
            continue
        for txt in title_alike_txts:
            if search_pattern_from_txt(txt, keywords_pattern):
                pages.append(p)
                if verbose: print(f'with pattern: found {txt}on p.{p}!')
    # consecutive_pages = pages
    consecutive_pages = [tuple(li) for li in consecutive_int_list(unique(pages))]
    # consecutive_pages = sorted(flatten([li for li in consecutive_int_list(list(set(pages))) if len(li) > 1]))
    # consecutive_pages = [tuple(li) for li in consecutive_int_list(list(set(pages))) if len(li) > 1]
    return consecutive_pages
コード例 #2
0
    def search_outline_in_pages(self, pattern, page_range=None, size='fontname', verbose=False, show_matched=False) -> list:
        '''
        return a list of pages number in tuples that contains pattern
        '''
        print('search by page!')
        # print(f'pattern: {pattern}')
        pages = set()
        matched_pattern = []
        with _by_pdfplumber(self.pdf_obj) as pdf:
            if not page_range:
                page_range = pdf.pages
            else:
                page_range = [pdf.pages[p] for p in page_range]
            
            for page in page_range:
                p = page.page_number - 1
                # if verbose: print(f'searching p.{p}')
                
                try:
                    title_alike_txts = get_title_liked_txt(page, size=size)
                except KeyError:
                    logging.warning('Non textual page')
                    continue
                for txt in title_alike_txts:
                    if search_pattern_from_txt(txt, pattern):
                        pages.add(p)
                        matched_pattern.append(txt)
                        if verbose: print(f'with pattern: found {txt} on p.{p}!')

            consecutive_pages = [tuple(li) for li in consecutive_int_list(unique(pages))]
            if show_matched:
                return consecutive_pages, matched_pattern
            return consecutive_pages
コード例 #3
0
ファイル: get_toc.py プロジェクト: fdq09eca/cs-project
def _get_page_by_outline(toc, title_pattern, to_page=True) -> list:
    '''
    return a list of matched title pattern page range
    '''
    # print('from outline')
    # if to_page:
    #     return [page_range[-1] for outline, page_range in toc.items() if re.search(title_pattern, outline, flags=re.IGNORECASE)] 
    # else:
        # return [page_range for outline, page_range in toc.items() if re.search(title_pattern, outline, flags=re.IGNORECASE)] 
    # return [list(range(page_range[0], page_range[1] + 1)) for outline, page_range in toc.items() if re.search(title_pattern, outline, flags=re.IGNORECASE)] 
    pages = flatten([list(range(page_range[0], page_range[1] + 1)) for outline, page_range in toc.items() if re.search(title_pattern, outline, flags=re.IGNORECASE)])
    consecutive_pages = [tuple(li) for li in consecutive_int_list(unique(pages))]
    return consecutive_pages
コード例 #4
0
ファイル: id_0049.py プロジェクト: CGenie/project_euler
def is_unusual(num):
    if is_prime(num):
        l = [x for x in str(num)]
        lst = []
        for p in permutations(l):
            x = int(''.join(p))
            if is_prime(x) and len(str(x)) == 4:
                lst.append(x)
        lst = unique(lst)
        lst.sort()
        if len(lst) >= 3 and contains_arithmetic_sequence(lst, 3):
            #print str(lst) + " is unusual"
            return True
    return False
コード例 #5
0
 def search_outline_in_toc(self, pattern) -> list:
     '''
     return a list of matched title pattern page range
     '''    
     print('search by toc!')
     pages = []
     
     for outline, _page_range in self.toc.items():
         if re.search(pattern, outline, flags=re.IGNORECASE):
             from_page, to_page = _page_range
             page_range = list(range(from_page, to_page + 1))
             pages.append(page_range)
     pages = flatten(pages)
     consecutive_pages = [tuple(li) for li in consecutive_int_list(unique(pages))]
     return consecutive_pages
コード例 #6
0
ファイル: id_0003.py プロジェクト: CGenie/project_euler
def prime_factors(n):
    return unique(prime_factors_non_unique(n))
コード例 #7
0
com_str = "SELECT * FROM app WHERE store_lat='"+slat+"' AND store_long='"+slong+"';"
cur.execute(com_str)

#cur.execute("SELECT * FROM app")
rows = cur.fetchall()

if rows==[]:
    rows = create_new_store(cur,name,slat,slong)


area = []
for r in rows:
    area.append(r[4])
    print(r)

area,ind = unique(area)
#print(area)
#print(ind)

con.commit()
cur.close()
con.close()


# <article class="media content-section">
#     <div class="media-body">
#       <p class="article-content">{{ r }}</p>
#     </div>
# </article>

# cur.execute("insert into app (store_id,store_name,area,item,if_there) values ('42.350903-71.114086', 'Target', 'bathroom', 'towel', '0'); \