def seo(request): #output = analyze('https://seositecheckup.com/', 'https://www.marketgoal.com/sitemap_index.xml') output = analyze('http://www.feedbackinfra.com/contact.php') customList = [] for keys, song in output.items(): #for datain in song: if type(song) == type([]): for datain in song: #if datain.get('url') != 'None': customList.append(datain) return render(request, 'about/seo.html', {'seoData': customList})
def test_print_output(): output = analyze('https://www.sethserver.com/tests/utf8.html') assert len(output['pages']) == 1 assert output['pages'][0][ 'url'] == 'https://www.sethserver.com/tests/utf8.html' assert output['pages'][0]['title'] == 'unicode chara¢ters' assert output['pages'][0]['description'] == '' assert output['pages'][0]['word_count'] == 493 assert output['errors'] == [] assert output['duplicate_pages'] == []
def index(request): domain = whois.whois('marketgoal.com') xml = urllib.request.urlopen( 'http://data.alexa.com/data?cli=11&dat=s&url={}'.format( "www.marketgoal.com")).read() result = xmltodict.parse(xml) data = json.dumps(result).replace("@", "") data_tojson = json.loads(data) # html = """ # <div>hello</div> # <a href="/index.html">Not this one</a>" # <a href="http://google.com">Link 1</a> # <a href="http:/amazon.com">Link 2</a> # """ # soup = BeautifulSoup('http://marketgoal.com') # http_links = soup.findAll('a', href=re.compile(r"^http")) # results = [a for a in http_links if a['href'].find('google') == -1] # print(results) # obj = CallAwis('AKIAJA5H7O5EY24LS5HQ' , '3NO5p4ebYtkwPaDavcLpgNUd+pwItyTYhlrvlHgd') # obj=CallAwis('AKIAJA5H7O5EY24LS5HQ','3NO5p4ebYtkwPaDavcLpgNUd+pwItyTYhlrvlHgd') # allalexaData = obj.urlinfo('http://www.pageturnpro.com/') # allalexaData = json.loads(json.dumps(xmltodict.parse(str(allalexaData)))) # alexaRand = {} # for key,value in allalexaData['aws:UrlInfoResponse']['aws:Response']['aws:UrlInfoResult']['aws:Alexa'].items(): # if key == 'aws:TrafficData': # alexaRand['Rank'] = value['aws:Rank'] # alexaRand['RankUs'] = value['aws:RankByCountry']['aws:Country'] #xmltodict.unparse(allalexaData, pretty=True) #urlinfo = obj.urlinfo("www.pageturnpro.com") #allalexaData = obj.urlinfo('http://www.pageturnpro.com/aboutus.aspx') output = analyze('https://www.foodcourier.com', 'https://www.foodcourier.com/sitemap.xml') pageError = [] for keys, song in output.items(): #for datain in song: if type(song) == type([]): for datain in song: pageError.append(datain) return render( request, 'product/index.html', { 'domains': domain, 'alexa': data_tojson["ALEXA"]["SD"][1], 'pageError': pageError })
def main(args=None): if not args: module_path = os.path.dirname(inspect.getfile(analyze)) arg_parser = argparse.ArgumentParser() arg_parser.add_argument( 'site', help='URL of the site you are wanting to analyze.') arg_parser.add_argument( '-s', '--sitemap', help='URL of the sitemap to seed the crawler with.') arg_parser.add_argument('-f', '--output-format', help='Output format.', choices=[ 'json', 'html', ], default='json') args = arg_parser.parse_args() output = analyze(args.site, args.sitemap) if args.output_format == 'html': from jinja2 import Environment from jinja2 import FileSystemLoader env = Environment(loader=FileSystemLoader( os.path.join(module_path, 'templates'))) template = env.get_template('index.html') output_from_parsed_template = template.render(result=output) print(output_from_parsed_template) elif args.output_format == 'json': print(json.dumps(output, indent=4, separators=(',', ': '))) else: exit(1)
from seoanalyzer import analyze from datetime import date import json import csv # Requisição API SEO / Formatação JSON output = analyze('https://santander.com.br/', 'https://www.santander.com.br/sitemap.xml') output_to_json = json.dumps(output) formated_output = json.loads(output_to_json) indice_csv = formated_output['pages'] # Abre arquivo CSV para leitura myFile = open('base.csv', 'a', newline='') # Criando var a ser lida no CSV csvwriter = csv.writer(myFile) # Correndo o Json, e ajustes na base bruta count = 0 for num, out in enumerate(indice_csv): indice_csv[num]['Data'] = date.today() indice_csv[num]['url'] = indice_csv[num]['url'].replace('http://www.', 'https://') if count == 0: header = out.keys() #csvwriter.writerow(header) count += 1 csvwriter.writerow(out.values()) myFile.close()
#! /usr/bin/env python3 # https://www.searchenginejournal.com/seo-tasks-automate-with-python/351050/ # https://github.com/sethblack/python-seo-analyzer/ from seoanalyzer import analyze # output = analyze(site, sitemap) siteA = "https://www.google.com/" analysisA = analyze(siteA) print(analysisA)
from seoanalyzer import analyze output = analyze("https://google.co.in") print(output)
'json', 'html', ], default='json') arg_parser.add_argument('-d', '--disk', help='save to disk', choices=[ 'y', 'n', ], default='y') args = arg_parser.parse_args() output = analyze(args.site, args.sitemap) if args.output_format == 'html': from jinja2 import Environment from jinja2 import FileSystemLoader env = Environment( loader=FileSystemLoader(os.path.join(module_path, 'templates'))) template = env.get_template('index.html') output_from_parsed_template = template.render(result=output) if args.disk == 'y': with open("test.html", "w", encoding='utf-8') as text_file: text_file.write(output_from_parsed_template) else: print(output_from_parsed_template) elif args.output_format == 'json':
def test_print_output(): output = analyze('https://www.sethserver.com/tests/utf8.html') assert False
def seo_analyser(url, sitemap): output = analyze(url, sitemap) print(output)