Python get_tree Exemples

Langage de programmation: Python

Espace de nommage/Pack: enlighten_scraper

Méthode/Fonction: get_tree

Exemples au hotexamples.com: 2

Python get_tree - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de enlighten_scraper.get_tree extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Associées

get_reboot_type

post

search

table__n_a_m_e

int2hex

settings

run

get_all_subnets

default_storage

get_orientation

Related in langs

AdminList (PHP)

rss2 (PHP)

FIXQuoteEntriesGroup (C#)

SkyDomeEntity (C#)

CLI_LOCAL_INIT (C++)

create_full (C++)

StartMultiNode (Go)

Event (Go)

Rakennettava (Java)

Utility (Java)

Exemple #1

0

Afficher le fichier

Fichier : makegraph.py Projet : tvzeller/SumProj

def graphs_from_files(): filenames = ["Adam Smith Business School", "Dental School", "School of Chemistry", "School of Critical Studies", "School of Culture and Creative Arts", "School of Education" ] schools_tree = es.get_tree("http://www.gla.ac.uk/schools/") ns = 'http://exslt.org/regular-expressions' path = '//div[@class="row standardContent"]//a[re:match(@href, "schools/[A-Za-z]+/")]' a_elems = schools_tree.xpath(path, namespaces={'re':ns}) base_url = "http://www.gla.ac.uk" urls = [] names = [] for a in a_elems: staff_page_url = base_url + a.get("href") + "staff/" urls.append(staff_page_url) school_name = a.text names.append(school_name) school_names_urls = zip(names, urls) print school_names_urls for name, url in school_names_urls: if name in filenames: with open("../coauthor_data/" + name + ".txt") as f: d = json.load(f) staff_names = es.get_names(url) gm = gfd.GraphMaker(d, staff_names) gm.write_to_file(name + " graph")

Exemple #2

0

Afficher le fichier

Fichier : makegraph.py Projet : tvzeller/SumProj

def get_and_graph(): schools_tree = es.get_tree("http://www.gla.ac.uk/schools/") ns = 'http://exslt.org/regular-expressions' path = '//div[@class="row standardContent"]//a[re:match(@href, "schools/[A-Za-z]+/")]' a_elems = schools_tree.xpath(path, namespaces={'re':ns}) base_url = "http://www.gla.ac.uk" urls = [] names = [] for a in a_elems: staff_page_url = base_url + a.get("href") + "staff/" urls.append(staff_page_url) school_name = a.text names.append(school_name) school_names_urls = zip(names, urls) print school_names_urls #remove SOCS as done already, physics for now cause it's huge for tup in school_names_urls[:]: if "Physics" in tup[0]: school_names_urls.remove(tup) # For each school for name, url in school_names_urls[10:]: print name, url if "Humanities" in name: name = "School of Humanities" author_name_urls = es.get_author_name_urls(name, url) # write these to file for safe keeping # ALREADY BEING DONE BY ES #with open("../nameurls/" + name + ".txt", 'w') as f: # json.dump(author_name_urls) coauthor_dict = es.get_coauthors_dict(author_name_urls, name) # extract just names from name urls and put in list #author_names = [author_name for author_name, author_url in author_name_urls] # Put names in Title First Name Last Name order for paper_id, data in coauthor_dict.items(): authors = data["authors"] newauthors = [(anu[0].split(", ")[1] + " " + anu[0].split(", ")[0], anu[1]) for anu in authors] coauthor_dict[paper_id]["authors"] = newauthors # Do the same for author_name_urls # TODO is this necessary? Because we're checking against urls - could even just give gm the urls author_name_urls = [(anu[0].split(", ")[1] + " " + anu[0].split(", ")[0], anu[1]) for anu in author_name_urls] # now make graph gm = gfd.GraphMaker() gm.populate_graph(coauthor_dict, author_name_urls) gm.add_metrics() gm.add_just_school_community() gm.write_to_file("../newestgraphs/" + name + ".json")