def change_pk_dic():
    pk_dic = make_article_list.read_pickle_pot('main_data')
    # print(pk_dic)
    pk_dic[144] = pk_dic[146]
    del pk_dic[146]
    print(pk_dic)
    make_article_list.save_data_to_pickle(pk_dic, 'main_data')
def make_side_bar_article_list(list_length, pd):
    click_list = []
    time_list = []
    pk_dec = make_article_list.read_pickle_pot('main_data', pd)
    with open('/Users/nakataketetsuhiko/Downloads/https___www/ページ.csv') as f:
        reader = csv.reader(f)
        csv_list = [row for row in reader]
    c_list = [y for y in csv_list[1:] if '#' not in y[0] and '/amp/' not in y[0]]
    print('pop by click')
    for click_a in c_list:
        if '/pc/' in click_a[0]:
            path = click_a[0].replace('https://www.demr.jp/pc/', '')
            for pk_id in pk_dec:
                if pk_dec[pk_id]['file_path'] == path:
                    click_list.append(pk_id)
                    print('{} : {}, {}'.format(path, str(pk_id), pk_dec[pk_id]['title']))
        if len(click_list) == list_length:
            break
    ga_list = make_ga_csv_list()
    t_list = [x for x in ga_list if '#' not in x[0] and '/pc/' not in x[0] and int(x[1].replace(',', '')) >= 100]
    t_list.sort(key=lambda x: datetime.datetime.strptime(x[3], '%H:%M:%S'), reverse=True)
    print('\nimportant by reading time')
    for t_art in t_list:
        t_path = t_art[0].replace('/amp/', '')
        for pk_i in pk_dec:
            if pk_dec[pk_i]['file_path'] == t_path and pk_i not in click_list:
                time_list.append(pk_i)
                print('{} : {}, {}, {}'.format(t_path, str(pk_i), pk_dec[pk_i]['title'], t_art[3]))
                break
        if len(time_list) == list_length:
            break
    print('\n')
    print(time_list)
    print(click_list)
def insert_id_and_category_to_html():
    pk_dic = make_article_list.read_pickle_pot('main_data')
    for id_p in pk_dic:
        with open('reibun/html_files/pc/' + pk_dic[id_p]['file_path'], 'r', encoding='utf-8') as f:
            long_str = f.read()
            long_str = long_str.replace('<head><!-- Global site tag (gtag.js) -->',
                                        '<head><!--id_num_' + str(id_p) + '--><!--category_' + pk_dic[id_p]['category']
                                        + '--><!-- Global site tag (gtag.js) -->')
            with open('reibun/html_files/pc/' + pk_dic[id_p]['file_path'], 'w', encoding='utf-8') as g:
                g.write(long_str)
def make_mod_date_list(pd):
    seen = []
    mod_list = []
    mod_log = make_article_list.read_pickle_pot('modify_log', pd)
    mod_log.reverse()
    for log in mod_log:
        if log[0] not in seen:
            mod_list.append([log[0].replace('reibun', ''), log[1]])
            seen.append(log[0])
    make_article_list.save_data_to_pickle(mod_list, 'mod_date_list', pd)
def count_title_str_num(len_dec, pd):
    result = []
    print('タイトル文字数チェック')
    p_data = make_article_list.read_pickle_pot('main_data', pd)
    # print(p_data)
    short_title_l = [[len(p_data[x]['title']), p_data[x]['file_path'], p_data[x]['title']] for x in p_data
                     if len(p_data[x]['title']) < 20]
    for y in sorted(short_title_l):
        if 'policy' not in y[1]:
            print(str(y[0]) + ' : {}  {}  {}'.format(y[1], y[2], len_dec['/pc/' + y[1]]))
            result.append(y[1])
    return result
def search_update_file(pd):
    last_upload = make_article_list.read_pickle_pot('last_md_mod', pd)
    all_files = [
        x for x in glob.glob(pd['project_dir'] + '/html_files/**/**',
                             recursive=True) if '_copy' not in x
        and '_test' not in x and '/template' not in x and '.cgi' not in x
    ]
    update_files = [
        x for x in list(set(all_files))
        if os.path.getmtime(x) > last_upload and os.path.isfile(x)
    ]
    print(update_files)
    file_upload.scp_upload(update_files, pd)
def insert_pub_date():
    pk_dic = make_article_list.read_pickle_pot('main_data')
    for id_p in pk_dic:
        print(pk_dic[id_p])
        with open('reibun/html_files/pc/' + pk_dic[id_p]['file_path'], 'r', encoding='utf-8') as f:
            long_str = f.read()
            pub_str_l = re.findall(r'<time itemprop="datePublished" datetime="(.+?)">', long_str)
            if pub_str_l:
                pub_str = pub_str_l[0]
            else:
                raise Exception
            pk_dic[id_p]['pub_date'] = pub_str
    print(pk_dic)
    for id_q in pk_dic:
        print(pk_dic[id_q]['pub_date'])
Ejemplo n.º 8
0
def insert_id_and_category_to_md(pd):
    pk_dic = make_article_list.read_pickle_pot('main_data', pd)
    for id_p in pk_dic:
        md_path = 'reibun/md_files/pc/' + pk_dic[id_p]['file_path'].replace(
            '.html', '.md')
        if os.path.exists(md_path):
            with open(md_path, 'r', encoding='utf-8') as f:
                long_str = f.read()
                long_str = re.sub(r'(d::.*?\n)', r'\1n::' + str(id_p) + r'\n',
                                  long_str)
                long_str = long_str.replace('/reibun/pc/', '/html_files/pc/')
                # print(long_str)
                with open(md_path, 'w', encoding='utf-8') as g:
                    g.write(long_str)
        else:
            print('no md_file : ' + pk_dic[id_p]['file_path'])
Ejemplo n.º 9
0
def make_new_main_data_pkl(pd):
    old_pkl = make_article_list.read_pickle_pot('main_data', pd)
    print(old_pkl)
    new_pkl = {}
    for x in old_pkl:
        new_pkl[x] = {
            'file_path': old_pkl[x][0],
            'title': old_pkl[x][1],
            'pub_date': old_pkl[x][2],
            'mod_date': old_pkl[x][3],
            'category': old_pkl[x][4],
            'description': old_pkl[x][5],
            'str_len': old_pkl[x][6],
            'layout_flag': old_pkl[x][7],
            'shift_flag': old_pkl[x][8]
        }
    make_article_list.save_data_to_pickle(new_pkl, 'main_data', pd)
    print(new_pkl)
def manual_add_modify_log(mod_file_path_list, pd):
    mod_log = make_article_list.read_pickle_pot('modify_log', pd)
    now = datetime.date.today()
    today_mod = [x[0] for x in mod_log if x[1] == str(now)]
    for mod_file_path in mod_file_path_list:
        with open(mod_file_path, 'r', encoding='utf-8') as f:
            long_str = f.read()
        title = re.findall(r'<title>(.+?)\|出会い系メール例文集</title>', long_str)[0]
        if mod_file_path not in today_mod:
            mod_log.append([mod_file_path, str(now), 'sitepage', title, 'mod'])
        else:
            for data in mod_log:
                if data[0] == mod_file_path and data[1] == str(now):
                    mod_log.remove(data)
                    mod_log.append(
                        [mod_file_path,
                         str(now), 'sitepage', title, 'mod'])
    make_article_list.save_data_to_pickle(mod_log, 'modify_log', pd)
    check_mod_date.make_mod_date_list(pd)
def next_update_target_search(aim_date, len_dec, pd):
    i = 0
    # with open('/Users/nakataketetsuhiko/Downloads/https___www/ページ.csv') as f:
    #     reader = csv.reader(f)
    #     csv_list = [row for row in reader]
    # c_list = sc_data_match_page(csv_list)
    # c_list = [y for y in csv_list[1:] if '#' not in y[0] and '/amp/' not in y[0]]
    today = datetime.date.today()
    pj_dir, domain, main_dir, site_name, pj_domain_main = query_check_and_make_html.project_select(
        pd['project_dir'] + '/')
    end_date = query_check_and_make_html.make_target_data_for_today(
        today, aim_date, pj_dir, domain)
    if aim_date == 28:
        period_name = 'month'
    elif aim_date == 7:
        period_name = 'week'
    else:
        period_name = str(aim_date) + '_'
    with open('gsc_data/' + pj_dir + '/p_' + period_name + end_date +
              '.csv') as f:
        reader = csv.reader(f)
        csv_list = [row for row in reader]
    c_list = query_check_and_make_html.sc_data_match_page(csv_list, domain)
    mod_list = make_article_list.read_pickle_pot('mod_date_list', pd)
    limit_d = datetime.datetime.now() - datetime.timedelta(days=aim_date)
    for i in range(len(mod_list)):
        if datetime.datetime.strptime(mod_list[i][1], '%Y-%m-%d') < limit_d:
            break
    mod_list_n = mod_list[:i + 1]
    print('日数 : ' + str(aim_date) + '日間')
    print('クリック数順')
    click_list = check_no_mod_page(mod_list_n, c_list, len_dec, pd)
    c_list.sort(key=lambda x: int(x[2]), reverse=True)
    print('表示回数順')
    display_list = check_no_mod_page(mod_list_n, c_list, len_dec, pd)
    c_list.sort(key=lambda x: int(x[1]), reverse=True)
    return click_list, display_list, c_list
            pub_str_l = re.findall(r'<time itemprop="datePublished" datetime="(.+?)">', long_str)
            if pub_str_l:
                pub_str = pub_str_l[0]
            else:
                raise Exception
            pk_dic[id_p]['pub_date'] = pub_str
    print(pk_dic)
    for id_q in pk_dic:
        print(pk_dic[id_q]['pub_date'])
    # make_article_list.save_data_to_pickle(pk_dic, 'main_data')


def insert_id_and_category_to_html():
    pk_dic = make_article_list.read_pickle_pot('main_data')
    for id_p in pk_dic:
        with open('reibun/html_files/pc/' + pk_dic[id_p]['file_path'], 'r', encoding='utf-8') as f:
            long_str = f.read()
            long_str = long_str.replace('<head><!-- Global site tag (gtag.js) -->',
                                        '<head><!--id_num_' + str(id_p) + '--><!--category_' + pk_dic[id_p]['category']
                                        + '--><!-- Global site tag (gtag.js) -->')
            with open('reibun/html_files/pc/' + pk_dic[id_p]['file_path'], 'w', encoding='utf-8') as g:
                g.write(long_str)


if __name__ == '__main__':
    # make_new_main_data_pkl()
    # change_pk_dic()
    # insert_pub_date()
    insert_id_and_category_to_html()
    print(make_article_list.read_pickle_pot('main_data'))
                    if 'sitepage/mintj.html' in long_str:
                        c_num = ''
                        for cl in click_list:
                            if page in cl[0]:
                                c_num = cl[1]
                                break
                        print(str(i) + ' : ' + page + ' ' + c_num)
                        j += 1
            if j > 9:
                break
        i += 1


if __name__ == '__main__':
    p_d = reibun.main_info.info_dict
    pickle_dec = make_article_list.read_pickle_pot('main_data', p_d)
    # print(pickle_dec)
    str_len_dec = {
        '/' + p_d['main_dir'] + pickle_dec[x]['file_path']:
        pickle_dec[x]['str_len']
        for x in pickle_dec
    }
    layout_dec = {
        '/' + p_d['main_dir'] + pickle_dec[x]['file_path']:
        pickle_dec[x]['layout_flag']
        for x in pickle_dec
    }
    shift_dec = {
        '/' + p_d['main_dir'] + pickle_dec[x]['file_path']:
        pickle_dec[x]['shift_flag']
        for x in pickle_dec
Ejemplo n.º 14
0
from add_article import make_article_list
import reibun.main_info


def make_new_main_data_pkl(pd):
    old_pkl = make_article_list.read_pickle_pot('main_data', pd)
    print(old_pkl)
    new_pkl = {}
    for x in old_pkl:
        new_pkl[x] = {
            'file_path': old_pkl[x][0],
            'title': old_pkl[x][1],
            'pub_date': old_pkl[x][2],
            'mod_date': old_pkl[x][3],
            'category': old_pkl[x][4],
            'description': old_pkl[x][5],
            'str_len': old_pkl[x][6],
            'layout_flag': old_pkl[x][7],
            'shift_flag': old_pkl[x][8]
        }
    make_article_list.save_data_to_pickle(new_pkl, 'main_data', pd)
    print(new_pkl)


if __name__ == '__main__':
    # make_new_main_data_pkl()
    # change_pk_dic()
    print(
        make_article_list.read_pickle_pot('main_data',
                                          reibun.main_info.info_dict))