def fetch_repo_list(): # github api request to fetch all repo with keyword:'smart contract' and stars:>9 url = 'https://api.github.com/search/repositories?q=smart%20contract+stars:%3E9&per_page=100' response = make_request(url, API_HEADER) json_response = json.loads(response.content) total_repo_count = int(json_response['total_count']) pages = ceil(total_repo_count / 100) # process response from first page repo_details(json_response) # process response from rest of pages while pages > 1: try: nextpage_url = response.links['next']['url'] except Exception as e: logging.exception(e) pages -= 1 continue response = make_request(nextpage_url, API_HEADER) json_response = json.loads(response.content) repo_details(json_response) pages -= 1
def get_contract_details_from_etherscan(deployment_address): """ The function extract contract details of a deployed contract from the Etherscan database using its deployment address. :param deployment_address: deployment address of the contract :return: DeploymentAddressDetails object containing deployment address information """ try: return DeploymentAddressDetails.objects( deployment_address=deployment_address).get() except DoesNotExist: # If details at deployment address does not exists # Fetch details from etherscan & store into local database url = f'https://etherscan.io/address/{deployment_address}#code' response = make_request(url, HEADER) parsed = BeautifulSoup(response.content, 'html.parser') _, contract_name, _, compiler_version, _, optimized, _, _ = [ text for text in parsed.find( 'div', class_='mx-gutters-lg-1').text.split('\n') if text ] compiler_version = compiler_version[1:].split('+')[0] optimized, _, runs, _ = optimized.split() optimized = bool(optimized == 'Yes') verified_bytecode = parsed.find('div', id='verifiedbytecode2').text verified_bytecode = trim_bytecode(verified_bytecode, compiler_version) obj = DeploymentAddressDetails(deployment_address=deployment_address, contract_name=contract_name, compiler_version=compiler_version, optimized=optimized, optimized_runs=int(runs), blockchain_bytecode=verified_bytecode) obj.save() return obj
def check_solidity_version(url): """Check if a repository is a fix repository by checking if it contains atleast one file with version >=0.4.19 :param url: URL of repository :return: True if repository is a fix repo else False """ # clone repo path = os.getcwd() + '/data' if not os.path.exists(path): os.mkdir('data') # check if repo has .sol files or not response = make_request(url + '/search?q=extension%3Asol', HEADER) parsed = BeautifulSoup(response.content, 'html.parser') try: parsed.find('div', class_='code-list').find('a')['href'] except Exception as e: logging.exception(e) logging.info('Does not contains .sol files') return False, '0' if not clone_repo(url, path): return False, '0' sol_files = get_sol_files(path + '/' + url.split('/')[-1]) for sol_file in sol_files: try: parsed = parser.parse_file(sol_file) ver = None for child in parsed['children']: if child['type'] == 'PragmaDirective': ver = child['value'] break # ver = parsed['children'][0]['value'].replace('^','') if not ver: logging.error('File version not found in file ' + str(sol_file)) continue ver = ver.replace('^', '') if '<' in ver: ver = ver.split('<')[0] file_sol_ver = semantic_version.SimpleSpec(ver) # checking if version >= 0.4.19 req_sol_ver = semantic_version.SimpleSpec('>=0.4.19') if req_sol_ver.match(file_sol_ver.clause.target): shutil.rmtree(path + '/' + url.split('/')[-1]) return True, len(sol_files) except Exception as e: logging.exception(e) continue # delete cloned copy of repo shutil.rmtree(path + '/' + url.split('/')[-1]) return False, '0'
def fetch_watchers(url): """Extract the number of watchers on a repository :param url: GitHub url of the repository :return: Number of watches """ response = make_request(url, HEADER) parsed = BeautifulSoup(response.content, 'html.parser') watcher_count = parsed.find('a', class_='social-count').text watcher_count = watcher_count.split('\n')[1].strip() return watcher_count
def get_prid_mergestatus(full_name, commit_hash): """Check if a commit is merged or not. If yes, return its PRID too. :param full_name: Full name of the repository :param commit_hash: commit hash :return: (PRID, Merged status) """ prid = None try: response = make_request( f'{GITHUB}/{full_name}/branch_commits/{commit_hash}', HEADER) parsed = BeautifulSoup(response.content, 'html.parser') prid = parsed.find('li', class_='pull-request') prid = prid.text.replace('(', '').replace(')', '').replace('#', '') return (prid, 'True') except: return ('null', 'False')
# print('Processing ' + row) repo_name = row.split('/')[1] #Skip blacklisted repo if row in BLACKLIST_REPOS: continue #store issue data in subdir named as USERNAME__REPONAME folder_name = row.replace('/', '__') if not os.path.exists(folder_name): os.mkdir(folder_name) os.chdir(folder_name) #get total number of pages response = make_request(GITHUB + row + '/issues?q=is%3Aissue', HEADER) parsed = BeautifulSoup(response.content, 'html.parser') pages = get_total_pages(parsed) #for each page #get all issues on current page for page in range(1, pages + 1): response = make_request( GITHUB + row + '/issues?q=is%3Aissue&page=' + str(page), HEADER) parsed = BeautifulSoup(response.content, 'html.parser') divs = parsed.findAll('div', class_='Box-row--focus-gray') for div in divs: issue_link = div.find('a', class_='link-gray-dark').attrs['href'] response = make_request(GITHUB + issue_link[1:], HEADER) #save html response of issue to reponame_issueID.html