コード例 #1
0
    def crawling(self, category_name):
        # Multi Process PID
        print(category_name + " PID: " + str(os.getpid()))

        writer = Writer(category_name=category_name, date=self.date)
        wcsv = writer.get_writer_csv()
        wcsv.writerow([
            "date", "time", "category", "company", "author", "headline",
            "sentence", "content_url", "image_url"
        ])

        # 기사 URL 형식
        url = "http://news.naver.com/main/list.nhn?mode=LSD&mid=sec&sid1=" + str(
            self.categories.get(category_name)) + "&date="

        # start_year년 start_month월 ~ end_year의 end_month 날짜까지 기사를 수집합니다.
        day_urls = self.make_news_page_url(url, self.date['year'],
                                           self.date['month'],
                                           self.date['day'])
        print(category_name + " Urls are generated")
        print("The crawler starts")

        with concurrent.futures.ThreadPoolExecutor() as pool:
            futureWorkers = []
            for URL in day_urls:
                futureWorkers.append(
                    pool.submit(
                        self.get_page_and_write_row,
                        category_name,
                        writer,
                        URL,
                    ))
            for future in concurrent.futures.as_completed(futureWorkers):
                print(future.result())
        writer.close()
コード例 #2
0
def total_content(Syear, Smonth, Sday, Fyear, Fmonth, Fday, unique_number):
    length = 0
    first = ""
    start_lst = list()
    finish_lst = list()

    with open(
            f"{PATH}/{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_TStatistic.csv",
            newline='') as csvfile:
        reader = csv.reader(csvfile)
        for i, line in enumerate(reader):
            length += 1
            if first == "":
                first = line[0]
                start_lst.append(i)
            else:
                if first != line[0]:
                    first = line[0]
                    start_lst.append(i)
                    finish_lst.append(i)
        finish_lst.append(length)

    final_lst = list()
    for i in range(len(start_lst)):
        dif = finish_lst[i] - start_lst[i]
        mine = dif // 10
        left = dif % 10
        start = start_lst[i] + mine * unique_number
        if left > unique_number:
            start += unique_number
        else:
            start += left
        if left > unique_number:
            mine += 1
        if mine != 0:
            final_lst.append((start, start + mine))

    setter = Setter(Syear, Smonth, Sday, Fyear, Fmonth, Fday, unique_number)
    with Pool(processes=50) as pool:
        a = pool.map(setter.multi_wrapper, final_lst)

    site_lst = list()
    for i in a:
        try:
            site = i[0][1].replace('/', '')
            site_lst.append(site)
            success_writer = Writer(
                f'{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_{site}url{unique_number}'
            )
            for j in i:
                success_writer.write_line(j)
            success_writer.close()
        except:
            print(f"Fail {i}")
    writer = Writer(
        f'{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_check{unique_number}')
    writer.close()
    return site_lst
コード例 #3
0
def split(Syear, Smonth, Sday, Fyear, Fmonth, Fday, threshold, setting):
    total_lst = list()
    target_lst = dict()
    startTime = datetime.now()

    if setting:
        total_dict = dict()
        for i in range(0, 10):
            with open(f"{PATH}/setting/setting{i}.json", "r",
                      encoding='utf-8') as csvfile:
                tmp_dict = json.load(csvfile)
                if i == 0:
                    total_dict = tmp_dict
                else:
                    for j in tmp_dict['application'].keys():
                        for k in tmp_dict['application'][j].keys():
                            total_dict['application'][j][k] = tmp_dict[
                                'application'][j][k]
            os.remove(f"{PATH}/setting/setting{i}.json")
        with open(f"{PATH}/setting/setting.json", 'w', encoding='utf-8') as f:
            json.dump(total_dict, f, ensure_ascii=False, indent=2)
        del total_dict

    for i in range(0, 10):
        with open(
                f"{PATH}/{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_Statistic{i}.csv",
                newline='') as csvfile:
            reader = csv.reader(csvfile)
            for line in reader:
                if (int(line[2]) >= int(threshold)) and (line[1]
                                                         not in except_lst):
                    site = line[0]
                    if site in target_lst:
                        key_lst = target_lst[site]
                        key_lst.append(line)
                        target_lst[site] = key_lst
                    else:
                        target_lst[site] = [line]
                total_lst.append(line)

        os.remove(
            f"{PATH}/{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_Statistic{i}.csv"
        )

    writer = Writer(f'{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_Statistic')
    for i in total_lst:
        writer.write_line(i)
    writer.close()

    writer = Writer(f'{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_TStatistic')
    for i in target_lst:
        for j in target_lst[i]:
            writer.write_line(j)
    writer.close()
    endTime = datetime.now()
コード例 #4
0
def train(nbSticks, nbGames, plotValueFunction, players, debugMode=False):
    writer = Writer() if debugMode else None
    game = Game(nbSticks, players, {
        ConfigKey.TRAIN: True,
        ConfigKey.DEBUG: debugMode
    })

    # stats for trained players
    valueFunctionStats = {}
    for p in (p for p in players if type(p) is TrainedPlayer):
        valueFunctionStats[p] = dict((i, []) for i in range(1, nbSticks + 1))

    print("\nTraining...")
    for i in range(0, nbGames):
        if writer:
            writer.beginGame(i, game.players[0], game.players[1])

        game.start()  # play a game

        if writer:
            writer.endGame(game.players[0], game.players[1])

        # update stats
        for p in valueFunctionStats:
            for s, v in p.v.items():
                valueFunctionStats[p][s].append(v)

        # decrease exploration over time
        if i % 10 == 0:
            for p in players:
                p.epsilon = max(p.epsilon * 0.996, 0.05)

        game.reset()

    if writer:
        writer.close()

    for p in (p for p in players if type(p) is TrainedPlayer):
        print(f'\n=== Value function of {p}: ===')
        PrettyPrinter().pprint(p.v)

    if plotValueFunction:
        plot(valueFunctionStats)
コード例 #5
0
def total_statistic(Syear, Smonth, Sday, Fyear, Fmonth, Fday, unique_number,
                    setting):
    length = 0
    first = ""
    start_lst = list()
    finish_lst = list()

    with open(PATH, newline='') as csvfile:
        reader = csv.reader(csvfile)
        for i, line in enumerate(reader):
            length += 1
            if first == "":
                first = line[0]
                start_lst.append(i)
            else:
                if first != line[0]:
                    first = line[0]
                    start_lst.append(i)
                    finish_lst.append(i)
        finish_lst.append(length)

    final_lst = list()
    for i in range(len(start_lst)):
        dif = finish_lst[i] - start_lst[i]
        mine = dif // 10
        left = dif % 10
        start = start_lst[i] + mine * unique_number
        if left > unique_number:
            start += unique_number
        else:
            start += left
        if left > unique_number:
            mine += 1
        if mine != 0:
            final_lst.append((start, start + mine))

    setter = Setter(Syear, Smonth, Sday, Fyear, Fmonth, Fday, setting)
    with Pool(processes=50) as pool:
        a = pool.map(setter.multi_wrapper, final_lst)

    success_writer = Writer(
        f'{Syear}{Smonth}{Sday}_{Fyear}{Fmonth}{Fday}_Statistic{unique_number}'
    )
    merged_dict = dict()
    log_dict = dict()
    for i in a:
        for j in i[0]:
            success_writer.write_line(j)
        if setting:
            merged_dict = {**merged_dict, **i[1]}
        if str(i[4]) in log_dict:
            log_dict[str(i[4])]['total'] = log_dict[str(i[4])]['total'] + i[2]
            log_dict[str(i[4])]['fail'] = log_dict[str(i[4])]['fail'] + i[3]
        else:
            log_dict[str(i[4])] = {'total': i[2], 'fail': i[3]}

    success_writer.close()

    if setting:
        json_object = dict()
        json_object['last-modified'] = str(datetime.now().date())
        json_object['application'] = merged_dict
        with open(f"{PATH2}/setting{unique_number}.json",
                  'w',
                  encoding='utf-8') as f:
            json.dump(json_object, f, ensure_ascii=False, indent=2)

    return log_dict
コード例 #6
0
ファイル: train.py プロジェクト: pokyboy/SketchGNN
def run_train(train_params=None, test_params=None):
    opt = TrainOptions().parse(train_params)
    testopt = TestOptions().parse(test_params)
    testopt.timestamp = opt.timestamp
    testopt.batch_size = 30

    # model init
    model = SketchModel(opt)
    model.print_detail()

    writer = Writer(opt)

    # data load
    trainDataloader = load_data(opt,
                                datasetType='train',
                                permutation=opt.permutation,
                                shuffle=opt.shuffle)
    validDataloader = load_data(opt, datasetType='valid')
    testDataloader = load_data(opt, datasetType='test')

    # train epoches
    # with torchsnooper.snoop():
    ii = 0
    min_test_avgloss = 100
    min_test_avgloss_epoch = 0
    for epoch in range(opt.epoch):
        for i, data in enumerate(trainDataloader):
            model.step(data)

            if ii % opt.plot_freq == 0:
                writer.plot_train_loss(model.loss, ii)
            if ii % opt.print_freq == 0:
                writer.print_train_loss(epoch, i, model.loss)

            ii += 1

        model.update_learning_rate()
        if opt.plot_weights:
            writer.plot_model_wts(model, epoch)

        # test
        if epoch % opt.run_test_freq == 0:
            model.save_network('latest')
            loss_avg, P_metric, C_metric = run_eval(opt=testopt,
                                                    loader=validDataloader,
                                                    dataset='valid',
                                                    write_result=False)
            writer.print_test_loss(epoch, 0, loss_avg)
            writer.plot_test_loss(loss_avg, epoch)
            writer.print_eval_metric(epoch, P_metric, C_metric)
            writer.plot_eval_metric(epoch, P_metric, C_metric)
            if loss_avg < min_test_avgloss:
                min_test_avgloss = loss_avg
                min_test_avgloss_epoch = epoch
                print(
                    'saving the model at the end of epoch {} with best avgLoss {}'
                    .format(epoch, min_test_avgloss))
                model.save_network('bestloss')

    testopt.which_epoch = 'latest'
    testopt.metric_way = 'wlen'
    loss_avg, P_metric, C_metric = run_eval(opt=testopt,
                                            loader=testDataloader,
                                            dataset='test',
                                            write_result=False)

    testopt.which_epoch = 'bestloss'
    testopt.metric_way = 'wlen'
    loss_avg_2, P_metric_2, C_metric_2 = run_eval(opt=testopt,
                                                  loader=testDataloader,
                                                  dataset='test',
                                                  write_result=False)

    record_list = {
        'p_metric': round(P_metric * 100, 2),
        'c_metric': round(C_metric * 100, 2),
        'loss_avg': round(loss_avg, 4),
        'best_epoch': min_test_avgloss_epoch,
        'p_metric_2': round(P_metric_2 * 100, 2),
        'c_metric_2': round(C_metric_2 * 100, 2),
        'loss_avg_2': round(loss_avg_2, 4),
    }
    writer.train_record(record_list=record_list)
    writer.close()
    return record_list, opt.timestamp
コード例 #7
0
    count = 0
    if opcode.cc is not None:
        line += "<" + opcode.cc.value
        count += 1
    for operand in opcode.operands:
        line += (", " if count > 0 else "<") + operand.template_value()
        count += 1
    if count > 0:
        line += ">"

    line += "},"
    writer.write(line)

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("usage: %s <8086|x86> output_filename" % sys.argv[0])
        sys.exit(1)

    writer = Writer(sys.argv[2])
    if sys.argv[1] == "8086":
        MODULE = opcodes_8086
        DISPATCH_FUNCTION_NAME = "CPU_8086::Instructions::DispatchInstruction"
        gen_dispatch(writer)
    elif sys.argv[1] == "x86":
        MODULE = opcodes_x86
        DISPATCH_FUNCTION_NAME = "CPU_X86::Interpreter::Dispatch"
        gen_dispatch(writer)
        gen_handler_table(writer, "CPU_X86::Interpreter", "s_handler_functions")
    
    writer.close()
コード例 #8
0
ファイル: uitae.py プロジェクト: 20-2-SKKU-OSS/2020-2-OSS-7
    def crawling(self, category_name):
        # Multi Process PID
        print(category_name + " PID: " + str(os.getpid()))

        writer = Writer(category_name=category_name, date=self.date)

        # 기사 URL 형식
        url = "http://news.naver.com/main/list.nhn?mode=LSD&mid=sec&sid1=" + str(
            self.categories.get(category_name)) + "&date="

        # start_year년 start_month월 ~ end_year의 end_month 날짜까지 기사를 수집합니다.
        day_urls = self.make_news_page_url(url, self.date['start_year'],
                                           self.date['end_year'],
                                           self.date['start_month'],
                                           self.date['end_month'])
        print(category_name + " Urls are generated")
        print("The crawler starts")

        for URL in day_urls:

            regex = re.compile("date=(\d+)")
            news_date = regex.findall(URL)[0]

            request = self.get_url_data(URL)

            document = BeautifulSoup(request.content, 'html.parser')

            # html - newsflash_body - type06_headline, type06
            # 각 페이지에 있는 기사들 가져오기
            post_temp = document.select(
                '.newsflash_body .type06_headline li dl')
            post_temp.extend(document.select('.newsflash_body .type06 li dl'))

            # 각 페이지에 있는 기사들의 url 저장
            post = []
            for line in post_temp:
                post.append(line.a.get(
                    'href'))  # 해당되는 page에서 모든 기사들의 URL을 post 리스트에 넣음
            del post_temp

            for content_url in post:  # 기사 URL
                # 크롤링 대기 시간
                sleep(0.01)

                # 기사 HTML 가져옴
                request_content = self.get_url_data(content_url)
                try:
                    document_content = BeautifulSoup(request_content.content,
                                                     'html.parser')
                except:
                    continue

                try:
                    # 기사 제목 가져옴
                    tag_headline = document_content.find_all(
                        'h3', {'id': 'articleTitle'}, {'class': 'tts_head'})
                    text_headline = ''  # 뉴스 기사 제목 초기화
                    text_headline = text_headline + ArticleParser.clear_headline(
                        str(tag_headline[0].find_all(text=True)))
                    if not text_headline:  # 공백일 경우 기사 제외 처리
                        continue

                    # 기사 본문 가져옴
                    tag_content = document_content.find_all(
                        'div', {'id': 'articleBodyContents'})
                    text_sentence = ''  # 뉴스 기사 본문 초기화
                    text_sentence = text_sentence + ArticleParser.clear_content(
                        str(tag_content[0].find_all(text=True)))
                    if not text_sentence:  # 공백일 경우 기사 제외 처리
                        continue

                    # 기사 언론사 가져옴
                    tag_company = document_content.find_all(
                        'meta', {'property': 'me2:category1'})
                    text_company = ''  # 언론사 초기화
                    text_company = text_company + str(
                        tag_company[0].get('content'))
                    if not text_company:  # 공백일 경우 기사 제외 처리
                        continue

                    # CSV 작성
                    wcsv = writer.get_writer_csv()
                    wcsv.writerow([
                        news_date, category_name, text_company, text_headline,
                        text_sentence, content_url
                    ])

                    del text_company, text_sentence, text_headline
                    del tag_company
                    del tag_content, tag_headline
                    del request_content, document_content

                except Exception as ex:  # UnicodeEncodeError ..
                    # wcsv.writerow([ex, content_url])
                    del request_content, document_content
                    pass
        writer.close()
コード例 #9
0
class LoggerImpl(object):
    def __init__(self):
        self.TRACE = 0
        self.DEBUG = 1
        self.INFO = 2
        self.WARN = 3
        self.ERROR = 4
        self.FATAL = 5
        self.REPORT = 6
        self.__all_log_level = {
            'trace': self.TRACE,
            'debug': self.DEBUG,
            'info': self.INFO,
            'warn': self.WARN,
            'error': self.ERROR,
            'fatal': self.FATAL,
            'report': self.REPORT,
        }
        self.__process = None
        self.__report = None
        self.__current_log_level = self.INFO

    def init(self,
             target,
             file_name,
             file_size=100 * 1024 * 1024,
             max_file_count=-1,
             multiprocess=False):
        try:
            if multiprocess:
                lock = multiprocessing.RLock()
            else:
                lock = threading.RLock()

            if not os.path.exists(target):
                os.makedirs(target)
            if not os.path.exists(target):
                return False

            self.__process = Writer(lock, target, file_name + '.process.log',
                                    file_size, max_file_count)
            if not self.__process.open():
                return False
            self.__report = Writer(lock, target, file_name + '.report.log',
                                   file_size, max_file_count)
            if not self.__report.open():
                return False

            if not SysLog.open():
                return False
            return True
        except:
            print traceback.format_exc()

    def set_level(self, level):
        if isinstance(level, int):
            if level < self.TRACE or level > self.REPORT:
                return False
            self.__current_log_level = level
            return True

        level_value = self.__all_log_level.get(level, None)
        if level_value is None:
            return False
        self.__current_log_level = level_value
        return True

    def close(self):
        try:
            if not self.__process:
                self.__process.close()
                self.__process = None

            if not self.__report:
                self.__report.close()
                self.__report = None

            SysLog.close()
        except:
            print traceback.format_exc()

    def trace(self, content):
        try:
            if self.TRACE < self.__current_log_level:
                return
            self.__process.write(content)
        except:
            SysLog.error(traceback.format_exc())

    def debug(self, content):
        try:
            if self.DEBUG < self.__current_log_level:
                return
            self.__process.write(content)
        except:
            SysLog.error(traceback.format_exc())

    def info(self, content):
        try:
            if self.INFO < self.__current_log_level:
                return
            self.__process.write(content)
        except:
            SysLog.error(traceback.format_exc())

    def warn(self, content):
        try:
            if self.WARN < self.__current_log_level:
                return
            self.__process.write(content)
        except:
            SysLog.error(traceback.format_exc())

    def error(self, content):
        try:
            if self.ERROR < self.__current_log_level:
                return
            self.__process.write(content)
        except:
            SysLog.error(traceback.format_exc())

    def fatal(self, content):
        try:
            if self.FATAL < self.__current_log_level:
                return
            self.__process.write(content)
        except:
            SysLog.error(traceback.format_exc())

    def report(self, content):
        try:
            if self.REPORT < self.__current_log_level:
                return
            self.__report.write(content)
        except:
            SysLog.error(traceback.format_exc())
コード例 #10
0
ファイル: compiler.py プロジェクト: hjobrien/PUF
def _main():
    indentation = [""]
    prevRelLine = [None]  # Previous relevant line
    startOfBlock = False
    line_no = 1

    writer = Writer(outFileName)
    jOut = open(jsonFileName, "w")
    jOut.write("[")

    for line in open(inFileName, "r"):
        if not len(line.strip()):
            continue
        line = processComments(line)
        if line_no != 1:
            jOut.write(",")
        assert whitespaceValid(
            line, indentation,
            startOfBlock), "Error: Invalid whitespace on line: %i" % line_no
        if startOfBlock:
            indentation.append(getLeadingWhitespace(line))
            prevRelLine.append(None)
        ws = getLeadingWhitespace(line)
        while indentation and ws != indentation[-1]:
            indentation.pop()
            temp = prevRelLine.pop()
            assert temp == None or temp[0] not in [
                "do"
            ], "Error, failed to properly close \"%s\" on line %i" % (temp[0],
                                                                      temp[1])
        if prevRelLine[-1] != None and prevRelLine[-1][0] == "do":
            writer.formatLine(line.replace("times", "").strip())
        elif prevRelLine[-1] != None and prevRelLine[-1][0] == "python":
            writer.inline = False
        try:
            writer.convert(line)
        except Exception:
            # print("Error parsing line: %i" % line_no)
            assert 0 == 0
        if not writer.inline:
            jsonObj = toJson(line)
            jsonObj.id = line_no
            jsonObj.prev = line_no - 1  #Dis shit is hacked together as f**k
            jsonObj.indent = len(ws)
            jOut.write(str(jsonObj))

        if prevRelLine[-1] != None:
            prevRelLine.pop()
        try:
            command = getCommand(line)
        except Exception:
            # print("Error parsing line: %i" % line_no)
            assert 0 == 0
        if command == "python":
            writer.inline = True
        # print(prevRelLine[-2] if len(prevRelLine) > 1 else "Top")
        startOfBlock = startsBlock(command)
        if startOfBlock:
            prevRelLine.append((command, line_no))
        line_no += 1

    writer.printLines()
    writer.close()
    jOut.write("]")
    jOut.close()