예제 #1
0
def get_price():
    price = ""
    price2 = ""
    price3 = ""
    try:
        price_object = poco(
            "com.alibaba.wireless:id/price_private_tip_container")
        if price_object.exists():
            logger.warning('Member price only.')
            return "", "", ""
        price_object = poco("com.alibaba.wireless:id/current_range")
        if price_object.exists():
            price = price_object.get_text()
            return price, "", ""
        price_object1 = poco("com.alibaba.wireless:id/textView1")
        if price_object1.exists():
            price_object2 = poco("com.alibaba.wireless:id/textView2")
            price_object3 = poco("com.alibaba.wireless:id/textView3")
            price1 = price_object1.get_text()
            price = price1
            if price_object2.exists():
                price2 = price_object2.get_text()
            if price_object3.exists():
                price3 = price_object3.get_text()
    except Exception as e:
        capture_error(e)
        print("get_price")
    logger.info("已读取价格信息。")
    return price, price2, price3
예제 #2
0
 def wrapper(*args, **kw):
     start_time = time.time()
     # logger.info('调用函数 = %s()' % func.__name__)
     result = func(*args, **kw)
     elapsed_time = time.time() - start_time
     logger.info("页面读取数据耗时 = %s" % format_time(elapsed_time))
     # print("elapsed_time: %d" % elapsed_time)
     return result
예제 #3
0
def get_newest_info(secret_key):
    html = requests.get('http://jwc.njupt.edu.cn/1594/list.htm',
                        headers=headers)
    html.encoding = 'utf-8'
    #Requests库的自身编码为: r.encoding = ‘ISO-8859-1’
    data = etree.HTML(html.text)
    contentList = data.xpath('//*[@id="newslist"]/div/div/table/tr')
    for content in contentList:
        title = content.xpath('td/table/tr/td[1]/a/text()')[0]
        href = content.xpath('td/table/tr/td[1]/a/@href')[0]
        submittime = content.xpath('td/table/tr/td[2]/div/text()')[0]
        if title.startswith('【实践科】') and submittime == "2018-11-09":
            if href.endswith('.htm'):
                detail = getDetailpage(href)
                data_info = {'text': title, 'desp': detail}
                submit_info(secret_key, data_info)
                logger.info('完成发送')
예제 #4
0
def get_share_and_snap(title, table):
    share_text = ''
    snap_filename = None
    try:
        # 点击分享按钮
        share_btn = poco("com.alibaba.wireless:id/iv_detail_shared")
        share_btn.wait_for_appearance(5)
        share_btn.click()

        # 等待出现二维码后才能点击复制口令
        QR_obj = \
            poco("android:id/content").child("android.widget.FrameLayout").offspring(
                "android.webkit.WebView").child(
                "android.view.View").child("android.view.View")[0].child("android.view.View").offspring(
                type="android.widget.Image")
        poco.wait_for_all(list(QR_obj), timeout=20)
        sleep(0.5)

        # 截图
        # 若不在数据库中,则为新增爬取数据,需传递截图的文件名
        if not snap_exists(title, table):
            snap_filename = get_goods_snapshot(SNAP_PATH)
        else:
            snap_filename = None

        # 点击“复制口令”按钮
        copy_btn = poco("android:id/content").child(
            "android.widget.FrameLayout").offspring(
                "com.alibaba.wireless:id/dynamic_share_channel_layout"
            ).offspring("com.alibaba.wireless:id/dynamic_share_recycler_view"
                        ).child("android.widget.LinearLayout")[0].child(
                            "android.widget.LinearLayout").child(
                                name="com.alibaba.wireless:id/item_name")
        copy_btn.wait_for_appearance()
        copy_btn.click()

        # 通过adb读取剪贴板中的分享口令
        output = exec_cmd(adb_get_clipboard)
        share_text = parse_outpost(output)
        logger.info("读取分享口令")
    except Exception as e:
        capture_error(e)
        print("get_share_text")

    return share_text, snap_filename
예제 #5
0
def get_logistics():
    try:

        logistics_city_object = poco(
            "com.alibaba.wireless:id/qx_logistics_city_txt")
        logistics_price_object = poco(
            "com.alibaba.wireless:id/qx_logistics_price_txt")
        if logistics_city_object.exists():
            logistics_city = logistics_city_object.get_text()
        else:
            logistics_city = "无"
        if logistics_price_object.exists():
            logistics_price = logistics_price_object.get_text()
        else:
            logistics_price = 0
        logger.info("已读取物流信息。")
        return logistics_city, logistics_price
    except Exception as e:
        capture_error(e)
        print("get_logistics")
def submit(request):
	if 'checkSignin' in request.session and request.session['checkSignin']:
		email = request.session['email']
		# nickname = request.session['nickname']
		submission = []
		for key in request.POST.keys():
			submission.append(request.POST.get(key))
		proId = eval(request.GET.get('proId'))
		jsonDataPath = 'data\\jsonData\\%s.json' % str(proId)
		with open(jsonDataPath) as f:
			X0 = (json.load(f))[0]
		raw_tokens = [item[0] for item in X0]
		problemInfo = Problem.objects.values('answer', 'blanks').filter(id=proId)[0]
		# answer_lst = json.loads(problemInfo['answer'])
		# TODO(tdye): not used answer_lst
		blanks_lst = json.loads(problemInfo['blanks'])
		i = 0
		submissionId = Submission.get_next_submission_id()
		logger.info(submission)
		while i < len(blanks_lst):
			temp = raw_tokens[:]
			submissionPath = 'data\\submissions\\%d-%d-%d.cpp' % (submissionId, proId, i + 1)
			temp[blanks_lst[i]] = submission[i]
			assembleContent = ''
			for item in temp:
				assembleContent += item + ' '
			with open(submissionPath, 'w') as f:
				logger.info('\n' + clean_c_style(assembleContent) + '\n')
				f.write(clean_c_style(assembleContent))
			logger.info(raw_tokens)
			logger.info(temp)
			i += 1
		try:
			dbSubmission = Submission(submissionId=submissionId, judgeStatus=-2, proId=proId, email=email,
			                          answer=json.dumps(submission))
			dbSubmission.save()
		except IOError as e:
			logger.error(e)
			logger.error('IO Error occurs while saving a new Submission %d' % submissionId)
		finally:
			return redirect('/getStatusList?volume=1')
	else:
		# request.session['errmsg'] = 'Please Login First'
		return render(request, 'upload.html', {})
예제 #7
0
def transform(vector: [], id: int, difficulty):
    program, problem, blanks_lst, answer_lst = assemble(vector,
                                                        difficulty=difficulty)
    if len(blanks_lst) == 0:
        logger.info('Model generates ZERO blank for problem %d!' % id)
        # for i in range(len(vector)):
        # 	probability = random.random()
        # 	if probability > 0.9:
        # 		vector[i][3] = 'B'
        for _ in range(len(vector) // 10):
            randId = random.randint(int(len(vector) * 1 / 4),
                                    int(len(vector) * 3 / 4))
            vector[randId][3] = 'B'
        program, problem, blanks_lst, answer_lst = assemble(
            vector, difficulty=difficulty)
        logger.info('System generates %d blanks randomly for problem %d.' %
                    (len(blanks_lst), id))
    else:
        logger.info('Model generates %d blanks for problem %d.' %
                    (len(blanks_lst), id))
    program = clean_c_style(program)
    problem = clean_c_style(problem)
    return program, problem, blanks_lst, answer_lst
def teacher(request):
	"""
	teacher signup, signin, checkTeacherSignin/
	:param request:
	:return:
	"""
	if request.method == 'GET':
		if request.GET.get('action') is None:
			repository = []
			if 'checkTeacherSignin' in request.session and request.session['checkTeacherSignin']:
				email = request.session['teacherEmail']
				for item in Teacher.get_my_repository(email=email, volume=1):
					item['averageScore'] = str(item['averageScore'])
					item['score'] = str("%.2f" % item['score'])
					repository.append(item)
			problemDB = Problem.objects.values('theme')
			problemDBInfo = {}
			from Model.themes import themes
			for problem in problemDB:
				themeList = (problem['theme']).split(',')
				for theme in themeList:
					theme = themes[theme]
					if theme in problemDBInfo.keys():
						problemDBInfo[theme] += 1
					else:
						problemDBInfo[theme] = 1
			return render(request, 'teacher_index.html', {'repository': repository,
			                                              'problemDBInfo': problemDBInfo})
		elif request.GET.get('action') == 'logout':
			nextURL = request.GET.get('next')
			request.session['checkTeacherSignin'] = False
			request.session['teacherEmail'] = ''
			request.session['teacherNickname'] = ''
			return redirect(nextURL)
	elif request.method == 'POST':
		if request.GET.get('action') == 'signin':
			email = request.POST.get('email')
			password = request.POST.get('password')
			exist = Teacher.objects.filter(email=email).exists()
			nextURL = request.GET.get('next')
			if exist:
				teacher = Teacher.objects.all().filter(email=email)[0]
				if check_password(password, teacher.password):
					request.session['checkTeacherSignin'] = True
					request.session['teacherEmail'] = teacher.email
					request.session['teacherNickname'] = teacher.nickname
					repository = []
					for item in Teacher.get_my_repository(email=email, volume=1):
						item['averageScore'] = str(item['averageScore'])
						item['score'] = str("%.2f" % item['score'])
						repository.append(item)
					request.session['repository'] = json.dumps(repository)
					return redirect(nextURL)
				else:
					# The password is wrong.
					return redirect(nextURL)
			else:
				# The account does not exist.
				return redirect(nextURL)
		elif request.GET.get('action') == 'upload':
			title, themes, description, score, author = \
				request.POST.get('title'), \
				request.POST.get('themes'), \
				request.POST.get('description'), \
				int(request.POST.get('score')), \
				request.session['teacherEmail']
			themes_ = themes.split(',')
			id = Problem.get_next_problem_id()
			origin_filename = "%s-origin%s" % (str(id), '.cpp')
			problem_filename = "%s%s" % (str(id), '.cpp')
			jsonData_filename = "%s%s" % (str(id), '.json')
			problem_file_obj = request.FILES.get('problem-file')
			origin_file_path = os.path.join('data\\problem', origin_filename)
			problem_file_path = os.path.join('data\\problem', problem_filename)

			f = open(origin_file_path, mode="wb")
			for i in problem_file_obj.chunks():  # TODO(tdye): using coroutine?
				f.write(i)
			f.close()
			X = program2vector.transform(origin_file_path)
			jsonDataPath = os.path.join('data\\jsonData', jsonData_filename)
			with open(jsonDataPath, mode='w', encoding='utf-8') as f:
				json.dump(X, f)
			for theme_ in themes_:
				X[0].append(["", "", int(theme_), "O"])
			X0 = predict.predict(X, themes_)[0]
			difficulty = int(request.POST.get('difficulty'))
			program, problem, blanks_lst, answer_lst = vector2program.transform(X0, id, difficulty=difficulty)
			logger.info('\nOrigin file %d:\n%s' % (id, program))
			logger.info('\nProblem file %d:\n%s' % (id, problem))
			logger.info('\nBlanks %d:\n%s' % (id, blanks_lst))
			logger.info('\nAnswer %d:\n%s' % (id, answer_lst))
			with open(problem_file_path, mode='w') as f:
				f.write(problem)
			os.remove(origin_file_path)
			test_cases_obj = request.FILES.get('test-cases')
			if not os.path.exists("%s%s" % ('data/test_cases/', str(id))):
				os.mkdir("%s%s" % ('data/test_cases/', str(id)))
			else:
				print('directory already exists')
				logger.error('Directory -%s%s already exists' % ('data/test_cases/', str(id)))
				return False
			test_cases_path_rar = os.path.join('data/test_cases', str(id), str(id) + '.zip')
			test_cases_path = os.path.join('data/test_cases', str(id))
			f = open(test_cases_path_rar, mode="wb")
			for i in test_cases_obj.chunks():  # TODO(tdye): using coroutine?
				f.write(i)
			f.close()
			unzip_file(test_cases_path_rar, test_cases_path)
			os.remove(test_cases_path_rar)
			# update database

			try:
				db_problem = Problem(id=id, title=title, theme=themes, description=description, author=author,
				                     score=score, answer=json.dumps(answer_lst), blanks=json.dumps(blanks_lst))
				db_problem.save()
			except ValueError:
				print("Invalid parameters => (%d, %s, %s, %s, %s, ) while saving a problem!" % (
				id, title, themes, description, author))
				logger.error("Invalid parameters => (%d, %s, %s, %s, %s, ) while saving a problem!" % (
				id, title, themes, description, author))
			finally:
				request.session['program'] = program
				request.session['problem-id'] = id
				request.session['problem'] = problem
				request.session['answer'] = answer_lst
				return redirect('/generation')
예제 #9
0
    def crawl_pages(self):
        parser = HtmlParser(self.driver)  # 初始化解析类
        save = Saver()  # 初始化数据保存类

        run_times = 0
        while True:
            run_times += 1  # 记录运行次数,仅用于调试
            logger.info("Preparing to parse page ({0}/{1})".format(
                self.current_page, self.page_counts))

            # 探测网络是否正常展示数据,如果没有则刷新数据
            self.refresh()

            # 刷新页面到底部
            logger.info("  Refresh current page.")
            self.goto_page_bottom()

            # 悬停每个商品上,获取价格数据
            logger.info("  Hover current page.")
            self.hover_all(0.5)

            # 解析页面数据
            logger.info("  Parse current page.")
            records = parser.get_page_data()

            # 保存记录
            logger.info("Saving to the database.")
            save.to_db(records)

            # 调试模式下的终止
            if DEBUG and run_times >= 1:
                break

            # 若当前页面数与总页面数相同,则停止循环
            if self.current_page == self.page_counts:
                break

            logger.info("Scroll to the next page.")
            self.scroll_page()

            logger.info("Wait and delay.")
            sleep(5)
예제 #10
0
def get_detail_data():
    # crawler_record = init_crawler_record()
    crawler_record = {}  # 初始化采集数据的dict对象
    trade_data = []
    seller_info = ()

    try:
        product_object = poco("com.alibaba.wireless:id/tv_detail_subject")
        product = product_object.get_text()
        product = product.strip(' ')
        logger.info("扫描商品 = {}".format(product))

        price1, price2, price3 = get_price()
        logistics_city, logistics_price = get_logistics()

        # 获取分享口令和商品截图
        share_text, snap_filename = get_share_and_snap(product, TABLE)
        crawler_record['share_text'] = share_text
        if snap_filename is not None:
            crawler_record['snapshot'] = snap_filename

        check_page_no = 0  # 详情页当前页面数
        trade_info_checked = False  # 保存是否扫描过交易信息
        seller_info_checked = False  # 保存是否扫描过厂家信息
        while check_page_no <= 2:  # 翻页3次扫描关键词
            headers = find_key_info()
            if not trade_info_checked:
                if headers[TRADE_INFO]:  # 若存在但不是完整的在页面中,则滚动到顶部
                    name, pos, key_obj = headers[TRADE_INFO]
                    if not object_in_view(TRADE_INFO, pos):
                        print("找到部分交易数据,翻动到顶部")
                        scroll_to_top(pos, top=0.2)
                        sleep(0.5)  # 滑动后需要暂停,否则无法按到按钮
                    logger.info("读取交易信息")
                    # 点击查看按钮,读取详细交易信息
                    trade_data = get_trade_info()
                    trade_info_checked = True
            headers = find_key_info()
            if not seller_info_checked:
                if headers[SELLER_INFO]:  # 回传是否存在组件
                    name, pos, key_obj = headers[SELLER_INFO]
                    if not object_in_view(SELLER_INFO, pos):
                        scroll_to_top(pos, top=0.3)
                    logger.info("读取厂家信息")
                    seller_info = get_seller_info()
                    seller_info_checked = True
            # 都找到了,退出本次扫描
            if trade_info_checked and seller_info_checked:
                break
            # 先找到了厂家,则说明没有交易信息,直接退出扫描
            if (trade_info_checked is False) and (seller_info_checked is True):
                break
            check_page_no += 1
            scroll_detail_page()  # 滚动一整页

        # 组合采集的数据
        crawler_record['title'] = product
        crawler_record['share_text'] = share_text
        # crawler_record['snapshot'] = snap_filename
        crawler_record['price1'] = price1
        crawler_record['price2'] = price2
        crawler_record['price3'] = price3
        crawler_record['logistics_city'] = logistics_city
        crawler_record['logistics_price'] = logistics_price
        # 保存交易信息的列表
        for i, trade in enumerate(trade_data):
            trade_keyword = 'trade' + str(i + 1)
            crawler_record[trade_keyword] = trade
        # 解包已读取的厂家信息数据
        crawler_record['company'], crawler_record['years'], crawler_record['back_rate'], crawler_record['buyer'], \
        crawler_record['desc'], crawler_record['respo'], crawler_record['delivery'], crawler_record['sign_desc'], \
        crawler_record['sign_respo'], crawler_record['sign_delivery'] = seller_info
        # if crawler_record['desc'] is None:
        #     crawler_record['desc'] = ''
        # if crawler_record['respo'] is None:
        #     crawler_record['respo'] = ''
        # if crawler_record['delivery'] is None:
        #     crawler_record['delivery'] = ''
        save_crawler(crawler_record, TABLE)
    except Exception as e:
        capture_error(e)
        print("get_detail_data")