예제 #1
0
                    continue
                if id_ in ids:
                    print("duplicate ")
                    continue
                date_nep = data["date_nepali"].split()[:3]
                yy = "".join([str(digit(c)) for c in date_nep[0]])
                if int(yy) < 2076:
                    print("filtered out article from year ", yy)
                    continue
                dd = "".join([str(digit(c)) for c in date_nep[2]])
                mm = month_map[date_nep[1]]
                nd = NepaliDate(yy, mm, dd, lang='nep')

                # print(date_nep, nd)
                # sys.exit(-1)
                data["date_english"] = nd.to_english_date().strftime(
                    "%Y/%-m/%d")
                data["title"] = data["title"].replace(u'\xa0', u' ')
                data["subtitle"] = data["subtitle"].replace(u'\xa0', u' ')
                data["description"] = data["description"].replace(
                    u'\xa0', u' ')
                data["source"] = "onlinekhabar"
                data["category"] = data["category"].split("/")[-1]
                # data['category'] = cat_map[data["category"].split("/")[-1]]
                cats.add(data["category"])
                outfile.write(json.dumps(data) + "\n")
                ids.add(id_)
                cnt += 1
                if cnt % 100 == 0:
                    print("processed ", cnt, " articles")
            except Exception as ex:
                print(ex, date_nep, nd)
예제 #2
0
 with open(f, 'r') as fp:
     for line in fp:
         data = json.loads(line)
         try:
             date_nep = data["date_nepali"].split(",")[1:3]
             # print(nd)
             yy = "".join([str(digit(c)) for c in date_nep[1][1:]])
             if int(yy) < 2076:
                 print("filtered out article from year ", yy)
                 continue
             dd = "".join(
                 [str(digit(c)) for c in date_nep[0].strip().split(' ')[1]])
             mm = month_map[date_nep[0].strip().split(' ')[0]]
             nd = NepaliDate(yy, mm, dd, lang='nep')
             # print(yy, mm, dd, nd, , )
             d_eng = nd.to_english_date()
             month = d_eng.month
             id_ = data["id"]
             if month not in {8, 9, 10}:
                 print("filtered out article ", data["url"])
                 continue
             if id_ in ids:
                 print("duplicate ")
                 continue
             data["date_english"] = d_eng.strftime("%Y/%-m/%d")
             data["title"] = data["title"].replace(u'\xa0', u' ')
             data["subtitle"] = data["subtitle"].replace(u'\xa0', u' ')
             data["description"] = data["description"].replace(
                 u'\xa0', u' ')
             data["source"] = "setopati"
             data["category"] = data["category"]
예제 #3
0
def convert_to_english(date):
    year, month, day = [int(s) for s in date.split("-") if s.isdigit()]
    nepali_date = NepaliDate(year, month, day)
    en_date = nepali_date.to_english_date()
    return en_date