def project(request, abstractid): LogRequest(request, "project: " + str(abstractid)) try: project = NSFProject.objects.get(awardID = abstractid) except: msg = "No project found for award ID: " + str(abstractid) LogWarning(msg) return render_to_response('error.html', {'message': msg, 'menu': generate_error_menu(request), }, context_instance=RequestContext(request)) pis = project.getPIs() collabs = uniq([c.project2 for c in CollabProjects.objects.filter(project1 = project)]) collabpis = uniq([(p.investigator, collab) for collab in collabs for p in ProjectPIs.objects.filter(project=collab)]) collabpis.sort(key=lambda r: r[0].lastname) amount = format_amount(project.amount) return render_to_response( 'project.html', {'project': project, 'menu': generate_menu(request), 'amount': amount, 'pis': pis, 'collabs': collabs, 'collabpis': collabpis}, context_instance=RequestContext(request) )
def project(request, abstractid): LogRequest(request, "project: " + str(abstractid)) try: project = NSFProject.objects.get(awardID=abstractid) except: msg = "No project found for award ID: " + str(abstractid) LogWarning(msg) return render_to_response('error.html', { 'message': msg, 'menu': generate_error_menu(request), }, context_instance=RequestContext(request)) pis = project.getPIs() collabs = uniq( [c.project2 for c in CollabProjects.objects.filter(project1=project)]) collabpis = uniq([(p.investigator, collab) for collab in collabs for p in ProjectPIs.objects.filter(project=collab)]) collabpis.sort(key=lambda r: r[0].lastname) amount = format_amount(project.amount) return render_to_response('project.html', { 'project': project, 'menu': generate_menu(request), 'amount': amount, 'pis': pis, 'collabs': collabs, 'collabpis': collabpis }, context_instance=RequestContext(request))
def write_mutatee_cmakelists(directory, info, platform): cmakelists = open(directory + "/cmake-mutatees.txt", "w") compilers = info['compilers'] mutatees = info['mutatees'] comps = utils.uniq(map(lambda m: m['compiler'], mutatees)) pname = os.environ.get('PLATFORM') modules = utils.uniq(map(lambda t: t['module'], info['tests'])) print_src_lists(mutatees, platform, info, directory) print_compiler_cmakefiles(mutatees, platform, info, cmakelists, directory)
def home(request): if not _is_logged_in(request): return redirect(users.create_login_url('/')) sites = all_sites() logins = sorted(uniq([site.login for site in sites if site.login])) passwords = sorted(uniq([site.password for site in sites if site.password])) return render(request, 'passwords.html', { 'sites': sites, 'sites_json': json.dumps(_sites_dict(sites)), 'logins_json': json.dumps(logins), 'passwords_json': json.dumps(passwords), 'logout_url': users.create_logout_url('/'), })
def _refresh_sites(request): sites = all_sites() sites_html = render(request, '_sites.html', { 'sites': sites }).content logins = sorted(uniq([site.login for site in sites if site.login])) passwords = sorted(uniq([site.password for site in sites if site.password])) return json.dumps({ 'sites_html': sites_html, 'sites': _sites_dict(sites), 'logins': logins, 'passwords': passwords, })
def gooddsspfilename(filename, dsspdirList=None): # first check if pdbid is really a file if os.path.exists(filename): return filename # extract pdbid from pdbid if dsspdirList is None: dsspdirList = _DSSP_DIR if type(dsspdirList) == str: dsspdirList = _DSSP_DIR.split(os.pathsep) pdbid = pdbidFromFilename(filename) pdbidl = pdbid.lower() branch = pdbidl[1:3] # generate filename variants basenames = [ x % vars() for x in ("%(filename)s", "%(pdbid)s", "%(pdbidl)s", "pdb%(pdbidl)s") ] basenames = uniq(basenames) extensions = ("", ".dssp", ".DSSP") compressions = ("", ".gz", ".Z") # generate subdirectory locations subdirs = [] for dsspdir in dsspdirList: innerSubdirs = [ x % vars() for x in ("", "%(branch)s", "%(dsspdir)s", os.path.join("%(dsspdir)s", "%(branch)s"), os.path.join("%(dsspdir)s", "divided", "%(branch)s"), os.path.join("%(dsspdir)s", "data", "structures", "divided", "pdb", "%(branch)s")) ] subdirs.extend(innerSubdirs) subdirs = uniq(subdirs) # search tree for subdir in subdirs: for cmp in compressions: for base in basenames: for ext in extensions: filename = os.path.join(subdir, "%(base)s%(ext)s%(cmp)s" % vars()) if os.path.exists(filename): return filename return None
def generateCollaborators(institution): pis = NSFInvestigator.objects.filter(satc=True).extra( select={ 'lower_name': 'lower(lastname)' }).order_by('lower_name', 'firstname').filter(attendee=True) for pi in pis: line = pi.email + ", " piprojects = ProjectPIs.objects.filter(investigator=pi) projects = sorted(uniq([p.project for p in piprojects if p.project]), key=lambda proj: proj.startDate) collaborators = sorted(uniq([ p.investigator for proj in projects for p in ProjectPIs.objects.filter(project=proj) ] + [ p.investigator for proj in projects for collab in [c.project2 for c in CollabProjects.objects.filter(project1=proj)] for p in ProjectPIs.objects.filter(project=collab) ]), key=lambda pi: pi.lastname) if institution: institutions = pi.getInstitutions() for institution in institutions: pis = uniq([ ppi.investigator for ppi in ProjectPIs.objects.filter( institution=institution) ]) pis = [icollab for icollab in pis if pi.attendee] if pi in pis: pis.remove(pi) else: pass # LogWarning("Not a self institution collaborator! " + pi.fullDisplay()) collaborators += pis if pi in collaborators: collaborators.remove(pi) else: pass # print "Not self-collaborator: " + pi.email line += ', '.join([ collaborator.email for collaborator in collaborators if collaborator.attendee ]) print line
def get_all_mutatee_sources(groupable, module, info): return utils.uniq(reduce(lambda a, b: set(a) | set(b), (map(lambda m: m['preprocessed_sources'], filter(lambda m: m['name'] != 'none' and is_valid_test(m) == 'true' and is_groupable(m, info) == groupable and get_module(m) == module, info['mutatees']))), []))
def institutions(request, max_page=MAX_PAGE): title = 'Institutions' projects, explanation = NSFProject.selectProjectsFromRequest(request) institutions = uniq([project.institution for project in projects]) institutions.sort(key=lambda inst: inst.name) explanation = str(len(institutions)) + " institutions hosting " + explanation if max_page < len(institutions) < 1.85 * max_page: max_page = 2 * max_page # don't paginate paginator = Paginator(institutions, max_page) page = request.GET.get('page') if page == 'all': showinstitutions = institutions else: try: showinstitutions = paginator.page(page) except PageNotAnInteger: showinstitutions = paginator.page(1) except EmptyPage: showinstitutions = paginator.page(paginator.num_pages) return render_to_response( 'institutions.html', {'title': title, 'menu': generate_menu(request), 'explanation': explanation, 'paginate': len(institutions) > len(showinstitutions), 'institutions': showinstitutions }, context_instance=RequestContext(request))
def generate_institution_graph(projects): edges = [] institutions = [] for project in projects: cols = [] institution = project.institution if not institution in institutions: institutions.append(institution) collabs = uniq([c.project2 for c in CollabProjects.objects.filter(project1 = project)]) for collab in collabs: collabinst = collab.institution if not collabinst in institutions: institutions.append(collabinst) cols.append(institutions.index(collabinst)) val = 1 val += int(project.amount / 1000000) if project.satc: val *= 2 edges += make_edges(cols, val) edges = merge_edges(edges) json_obj = {'nodes': [], 'links': []} for institution in institutions: desc = {'name': institution.showName(), 'group': 1, 'instid': institution.id } json_obj['nodes'].append(desc) for edge in edges: desc = {"source": edge[0], "target": edge[1], "value": edge[2]} json_obj['links'].append(desc) json_str = json.dumps(json_obj) return json_str
def group_textlines(self, laparams, lines): plane = Plane(self.bbox) plane.extend(lines) boxes = {} for line in lines: neighbors = line.find_neighbors(plane, laparams.line_margin) if line not in neighbors: continue members = [] for obj1 in neighbors: members.append(obj1) if obj1 in boxes: members.extend(boxes.pop(obj1)) if isinstance(line, LTTextLineHorizontal): box = LTTextBoxHorizontal() else: box = LTTextBoxVertical() for obj in uniq(members): box.add(obj) boxes[obj] = box done = set() for line in lines: if line not in boxes: continue box = boxes[line] if box in done: continue done.add(box) if not box.is_empty(): yield box return
def get_textboxes(self, laparams, lines): plane = Plane(lines) boxes = {} for line in lines: neighbors = line.find_neighbors(plane, laparams.line_margin) assert line in neighbors, line members = [] for obj1 in neighbors: members.append(obj1) if obj1 in boxes: members.extend(boxes.pop(obj1)) if isinstance(line, LTTextLineHorizontal): box = LTTextBoxHorizontal() else: box = LTTextBoxVertical() for obj in uniq(members): box.add(obj) boxes[obj] = box done = set() for line in lines: box = boxes[line] if box in done: continue done.add(box) if not box.is_empty(): yield box return
def nomination_ids_for(congress, options={}): nomination_ids = [] page = page_for(congress, options) if not page: logging.error("Couldn't download page for %d congress" % congress) return None # extract matching links doc = html.document_fromstring(page) raw_nomination_ids = doc.xpath('//div[@id="content"]/p[2]/a/text()') nomination_ids = [] for raw_id in raw_nomination_ids: pieces = raw_id.split(' ') # ignore these if raw_id in ["PDF", "Text", "split into two or more parts"]: pass elif len(pieces) < 2: logging.error("Bad nomination ID detected: %s" % raw_id) return None else: nomination_ids.append(pieces[1]) return utils.uniq(nomination_ids)
def get_textboxes(self, laparams, lines): plane = Plane(lines) boxes = {} for line in lines: neighbors = line.find_neighbors(plane, laparams.line_margin) assert line in neighbors, line members = [] for obj1 in neighbors: members.append(obj1) if obj1 in boxes: members.extend(boxes.pop(obj1)) if isinstance(line, LTTextLineHorizontal): box = LTTextBoxHorizontal() else: box = LTTextBoxVertical() for obj in uniq(members): box.add(obj) boxes[obj] = box done = set() for line in lines: box = boxes[line] if box in done: continue done.add(box) yield box.analyze(laparams) return
def edit_statements_page(title, csv_file, username, summary=None): """ Edit a wiki page of suggested statements. """ rows = csv.reader(csv_file) (header, property) = next(rows) assert header == "qid" statements = [] for (qid, value) in rows: statements.append((qid, property, value)) statements.sort() statements = list(uniq(statements)) lines = [] for (entity, property, value) in statements: lines.append("* {{Statement|" + entity + "|" + property + "|" + str(value) + "}}") if statements: lines.append("") lines.append( wikitext.link("Add via QuickStatements", quickstatements_url(statements))) lines.append("") text = "\n".join(lines) return page.edit(title, text, username, summary)
def institutions(request, max_page=MAX_PAGE): title = 'Institutions' projects, explanation = NSFProject.selectProjectsFromRequest(request) institutions = uniq([project.institution for project in projects]) institutions.sort(key=lambda inst: inst.name) explanation = str( len(institutions)) + " institutions hosting " + explanation if max_page < len(institutions) < 1.85 * max_page: max_page = 2 * max_page # don't paginate paginator = Paginator(institutions, max_page) page = request.GET.get('page') if page == 'all': showinstitutions = institutions else: try: showinstitutions = paginator.page(page) except PageNotAnInteger: showinstitutions = paginator.page(1) except EmptyPage: showinstitutions = paginator.page(paginator.num_pages) return render_to_response( 'institutions.html', { 'title': title, 'menu': generate_menu(request), 'explanation': explanation, 'paginate': len(institutions) > len(showinstitutions), 'institutions': showinstitutions }, context_instance=RequestContext(request))
def index(options={}): concordance = defaultdict(list) files = [ x for x in os.listdir(os.getcwd() + "/data/") if re.sub("\d+\.json", "", x) == "" ] if options.get('limit', False): files = files[:options.get('limit')] for file in files: sermon = json.load(open(os.getcwd() + "/data/" + file, 'r')) words = uniq( re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower())) ''' if options.get("uniques", False): words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower())) else: words = re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower()) ''' for word in words: if len(word) > 2: concordance[word].append(file.replace('.json', '')) write(json.dumps(concordance, sort_keys=True, indent=2), os.getcwd() + "/src/data/index.json") write(json.dumps(concordance, sort_keys=True), os.getcwd() + "/src/data/index.min.json")
def getCollaborators(projects): collaborators = sorted(uniq([ p.investigator for proj in projects for p in ProjectPIs.objects.filter(project=proj) ] + [ p.investigator for proj in projects for collab in [c.project2 for c in CollabProjects.objects.filter(project1=proj)] for p in ProjectPIs.objects.filter(project=collab) ]), key=lambda pi: pi.lastname) return collaborators
def getCollaborators(projects): collaborators = sorted(uniq( [p.investigator for proj in projects for p in ProjectPIs.objects.filter(project=proj)] + [p.investigator for proj in projects for collab in [c.project2 for c in CollabProjects.objects.filter(project1 = proj)] for p in ProjectPIs.objects.filter(project=collab)]), key = lambda pi: pi.lastname) return collaborators
def generateCollaborators(institution): pis = NSFInvestigator.objects.filter(satc=True).extra(select={'lower_name': 'lower(lastname)'}).order_by('lower_name', 'firstname').filter(attendee=True) for pi in pis: line = pi.email + ", " piprojects = ProjectPIs.objects.filter(investigator=pi) projects = sorted(uniq([p.project for p in piprojects if p.project]), key=lambda proj: proj.startDate) collaborators = sorted(uniq( [p.investigator for proj in projects for p in ProjectPIs.objects.filter(project=proj)] + [p.investigator for proj in projects for collab in [c.project2 for c in CollabProjects.objects.filter(project1 = proj)] for p in ProjectPIs.objects.filter(project=collab)]), key = lambda pi: pi.lastname) if institution: institutions = pi.getInstitutions() for institution in institutions: pis = uniq([ppi.investigator for ppi in ProjectPIs.objects.filter(institution=institution)]) pis = [icollab for icollab in pis if pi.attendee] if pi in pis: pis.remove(pi) else: pass # LogWarning("Not a self institution collaborator! " + pi.fullDisplay()) collaborators += pis if pi in collaborators: collaborators.remove(pi) else: pass # print "Not self-collaborator: " + pi.email line += ', '.join([collaborator.email for collaborator in collaborators if collaborator.attendee]) print line
def bill_ids_for(congress, options): bill_ids = [] bill_type = options.get('bill_type', None) if bill_type: bill_types = [bill_type] else: bill_types = utils.thomas_types.keys() for bill_type in bill_types: # match only links to landing pages of this bill type # it shouldn't catch stray links outside of the confines of the 100 on the page, # but if it does, no big deal link_pattern = "^\s*%s\d+\s*$" % utils.thomas_types[bill_type][1] # loop through pages and collect the links on each page until # we hit a page with < 100 results, or no results offset = 0 while True: # download page, find the matching links page = utils.download( page_for(congress, bill_type, offset), page_cache_for(congress, bill_type, offset), options.get('force', False)) if not page: logging.error("Couldn't download page with offset %i, aborting" % offset) return None # extract matching links doc = html.document_fromstring(page) links = doc.xpath( "//a[re:match(text(), '%s')]" % link_pattern, namespaces={"re": "http://exslt.org/regular-expressions"}) # extract the bill ID from each link for link in links: code = link.text.lower().replace(".", "").replace(" ", "") bill_ids.append("%s-%s" % (code, congress)) if len(links) < 100: break offset += 100 # sanity check, while True loops are dangerous if offset > 100000: break return utils.uniq(bill_ids)
def mat_h2_plus_old(bond_length, bspline_set, l_list): """ gives hamiltonian matrix and overlap matrix of hydrogem molecule ion Parameters ---------- bond_length : Double bond length of hydrogen molecular ion bspline_set : BSplineSet l_list: list of non negative integer list of angular quauntum number to use Returns ------- h_mat : numpy.ndarray hamiltonian matrix s_mat : numpy.ndarray overlap pmatrix """ # compute r1 matrix (B_i|O|B_j) # (-1/2 d^2/dr^2, 1, 1/r^2, {s^L/g^{L+1} | L<-l_list}) rs = bspline_set.xs d2_rmat = bspline_set.d2_mat() r2_rmat = bspline_set.v_mat(1.0/(rs*rs)) s_rmat = bspline_set.s_mat() tmp_L_list = uniq(flatten([ls_non_zero_YYY(L1, L2) for L1 in l_list for L2 in l_list])) en_r1mat_L = {} for L in tmp_L_list: en_r1mat_L[L] = bspline_set.en_mat(L, bond_length/2.0) # compute r1Y matrix (B_iY_L1M1|O|B_jY_L2M2) def one_block(L1, L2): v = -2.0*sum([sqrt(4.0*pi/(2*L+1)) * y1mat_Yqk((L1, 0), (L, 0), (L2, 0)) * en_r1mat_L[L] for L in ls_non_zero_YYY(L1, L2)]) if L1 == L2: L = L1 t = -0.5 * d2_rmat + L*(L+1)*0.5*r2_rmat return t+v else: return v H_mat = bmat([[one_block(L1, L2) for L1 in l_list] for L2 in l_list]) S_mat = bmat([[s_rmat if L1 == L2 else None for L1 in l_list] for L2 in l_list]) return (H_mat, S_mat)
def getScopFamily(self, id, chainid=None, resnum=None): if len(id) == 4: families = [] for entry in self.entriesByPdbid.get(id, []): if chainid is not None: # check chainid pass if resnum is not None: # check resnum pass families.append(entry.scopfamily) families = sort(uniq(families)) return ';'.join(families) return ''
def nomination_ids_for(congress, options={}): nomination_ids = [] page = page_for(congress) if not page: logging.error("Couldn't download page for %d congress" % congress) return None # extract matching links doc = html.document_fromstring(page) nomination_ids = doc.xpath('//div[@id="content"]/p[2]/a/text()') nomination_ids = [x.split(' ')[1] for x in nomination_ids] return utils.uniq(nomination_ids)
def vote_ids_for_house(congress, session_year, options): vote_ids = [] index_page = "http://clerk.house.gov/evs/%s/index.asp" % session_year group_page = r"ROLL_(\d+)\.asp" link_pattern = r"http://clerk.house.gov/cgi-bin/vote.asp\?year=%s&rollnumber=(\d+)" % session_year # download index page, find the matching links to the paged listing of votes page = utils.download( index_page, "%s/votes/%s/pages/house.html" % (congress, session_year), options) if not page: logging.error("Couldn't download House vote index page, skipping") return None # extract matching links doc = html.document_fromstring(page) links = doc.xpath( "//a[re:match(@href, '%s')]" % group_page, namespaces={"re": "http://exslt.org/regular-expressions"}) for link in links: # get some identifier for this inside page for caching grp = re.match(group_page, link.get("href")).group(1) # download inside page, find the matching links page = utils.download( urllib.parse.urljoin(index_page, link.get("href")), "%s/votes/%s/pages/house_%s.html" % (congress, session_year, grp), options) if not page: logging.error("Couldn't download House vote group page (%s), aborting" % grp) continue doc = html.document_fromstring(page) votelinks = doc.xpath( "//a[re:match(@href, '%s')]" % link_pattern, namespaces={"re": "http://exslt.org/regular-expressions"}) for votelink in votelinks: num = re.match(link_pattern, votelink.get("href")).group(1) vote_id = "h" + num + "-" + str(congress) + "." + session_year if not should_process(vote_id, options): continue vote_ids.append(vote_id) return utils.uniq(vote_ids)
def nomination_ids_for(congress, options = {}): nomination_ids = [] page = page_for(congress) if not page: logging.error("Couldn't download page for %d congress" % congress) return None # extract matching links doc = html.document_fromstring(page) nomination_ids = doc.xpath('//div[@id="content"]/p[2]/a/text()') nomination_ids = [x.split(' ')[1] for x in nomination_ids] return utils.uniq(nomination_ids)
def vote_ids_for_house(congress, session_year, options): vote_ids = [] index_page = "http://clerk.house.gov/evs/%s/index.asp" % session_year group_page = r"ROLL_(\d+)\.asp" link_pattern = r"http://clerk.house.gov/cgi-bin/vote.asp\?year=%s&rollnumber=(\d+)" % session_year # download index page, find the matching links to the paged listing of votes page = utils.download( index_page, "%s/votes/%s/pages/house.html" % (congress, session_year), options) if not page: logging.error("Couldn't download House vote index page, aborting") return None # extract matching links doc = html.document_fromstring(page) links = doc.xpath( "//a[re:match(@href, '%s')]" % group_page, namespaces={"re": "http://exslt.org/regular-expressions"}) for link in links: # get some identifier for this inside page for caching grp = re.match(group_page, link.get("href")).group(1) # download inside page, find the matching links page = utils.download( urlparse.urljoin(index_page, link.get("href")), "%s/votes/%s/pages/house_%s.html" % (congress, session_year, grp), options) if not page: logging.error("Couldn't download House vote group page (%s), aborting" % grp) continue doc = html.document_fromstring(page) votelinks = doc.xpath( "//a[re:match(@href, '%s')]" % link_pattern, namespaces={"re": "http://exslt.org/regular-expressions"}) for votelink in votelinks: num = re.match(link_pattern, votelink.get("href")).group(1) vote_id = "h" + num + "-" + str(congress) + "." + session_year if not should_process(vote_id, options): continue vote_ids.append(vote_id) return utils.uniq(vote_ids)
def mat_h2_plus(bond_length, bspline_set, l_list): """ gives hamiltonian matrix and overlap matrix of hydrogem molecule ion Parameters ---------- bond_length : Double bond length of hydrogen molecular ion bspline_set : BSplineSet l_list: list of non negative integer list of angular quauntum number to use Returns ------- h_mat : numpy.ndarray hamiltonian matrix s_mat : numpy.ndarray overlap pmatrix """ # compute r1 matrix (B_i|O|B_j) # (-1/2 d^2/dr^2, 1, 1/r^2, {s^L/g^{L+1} | L<-l_list}) rs = bspline_set.xs d2_rmat = bspline_set.d2_mat() r2_rmat = bspline_set.v_mat(1.0/(rs*rs)) s_rmat = bspline_set.s_mat() tmp_L_list = uniq(flatten([ls_non_zero_YYY(L1, L2) for L1 in l_list for L2 in l_list])) en_r1mat_L = {} for L in tmp_L_list: en_r1mat_L[L] = bspline_set.en_mat(L, bond_length/2.0) # compute y1 matrix (Y_L1|P_L(w_A)|Y_L2) en_y1mat_L = {} for L in tmp_L_list: en_y1mat_L[L] = coo_matrix([[np.sqrt(4.0*np.pi/(2*L+1)) * y1mat_Yqk((L1, 0), (L, 0), (L2, 0)) for L1 in l_list] for L2 in l_list]) LL_y1mat = coo_matrix(np.diag([1.0*L*(L+1) for L in l_list])) diag_y1mat = coo_matrix(np.diag([1 for L in l_list])) # compute r1y1 matrix h_r1y1mat = (synthesis_mat(-0.5*d2_rmat, diag_y1mat) + synthesis_mat(+0.5*r2_rmat, LL_y1mat) - 2.0 * sum([synthesis_mat(en_r1mat_L[L], en_y1mat_L[L]) for L in tmp_L_list])) s_r1y1mat = synthesis_mat(s_rmat, diag_y1mat) return (h_r1y1mat, s_r1y1mat)
def generate_institution_graph(projects): edges = [] institutions = [] for project in projects: cols = [] institution = project.institution if not institution in institutions: institutions.append(institution) collabs = uniq([ c.project2 for c in CollabProjects.objects.filter(project1=project) ]) for collab in collabs: collabinst = collab.institution if not collabinst in institutions: institutions.append(collabinst) cols.append(institutions.index(collabinst)) val = 1 val += int(project.amount / 1000000) if project.satc: val *= 2 edges += make_edges(cols, val) edges = merge_edges(edges) json_obj = {'nodes': [], 'links': []} for institution in institutions: desc = { 'name': institution.showName(), 'group': 1, 'instid': institution.id } json_obj['nodes'].append(desc) for edge in edges: desc = {"source": edge[0], "target": edge[1], "value": edge[2]} json_obj['links'].append(desc) json_str = json.dumps(json_obj) return json_str
def index(options = {}): concordance = defaultdict(list) files = [x for x in os.listdir(os.getcwd() + "/data/") if re.sub("\d+\.json", "", x) == ""] if options.get('limit', False): files = files[:options.get('limit')] for file in files: sermon = json.load(open(os.getcwd() + "/data/" + file, 'r')) words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower())) ''' if options.get("uniques", False): words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower())) else: words = re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower()) ''' for word in words: if len(word) > 2: concordance[word].append(file.replace('.json', '')) write(json.dumps(concordance, sort_keys=True, indent=2), os.getcwd() + "/src/data/index.json") write(json.dumps(concordance, sort_keys=True), os.getcwd() + "/src/data/index.min.json")
def displayPI(request, pi): piprojects = ProjectPIs.objects.filter(investigator=pi) projects = sorted(uniq([p.project for p in piprojects if p.project]), key=lambda proj: proj.startDate) totalawarded = format_amount(sum([project.amount for project in projects])) collaborators = getCollaborators(projects) try: collaborators.remove(pi) except: LogWarning("Not a self-collaborator: " + pi.fullDisplay()) institutions = pi.getInstitutions() return render_to_response('pi.html', { 'pi': pi, 'menu': generate_menu(request), 'totalawarded': totalawarded, 'institutions': institutions, 'projects': projects, 'collaborators': collaborators }, context_instance=RequestContext(request))
def displayPI(request, pi): piprojects = ProjectPIs.objects.filter(investigator=pi) projects = sorted(uniq([p.project for p in piprojects if p.project]), key=lambda proj: proj.startDate) totalawarded = format_amount(sum([project.amount for project in projects])) collaborators = getCollaborators(projects) try: collaborators.remove(pi) except: LogWarning("Not a self-collaborator: " + pi.fullDisplay()) institutions = pi.getInstitutions() return render_to_response( 'pi.html', {'pi': pi, 'menu': generate_menu(request), 'totalawarded': totalawarded, 'institutions': institutions, 'projects': projects, 'collaborators': collaborators}, context_instance=RequestContext(request) )
def bill_ids_for(congress, options, bill_states={}): # override if we're actually using this method to get amendments doing_amendments = options.get('amendments', False) bill_ids = [] bill_type = options.get( 'amendment_type' if doing_amendments else 'bill_type', None) if bill_type: bill_types = [bill_type] else: bill_types = utils.thomas_types.keys() for bill_type in bill_types: # This sub is re-used for pulling amendment IDs too. if (bill_type in ('samdt', 'hamdt', 'supamdt')) != doing_amendments: continue # match only links to landing pages of this bill type # it shouldn't catch stray links outside of the confines of the 100 on the page, # but if it does, no big deal link_pattern = "^\s*%s\d+\s*$" % utils.thomas_types[bill_type][1] # loop through pages and collect the links on each page until # we hit a page with < 100 results, or no results offset = 0 while True: # download page, find the matching links page = utils.download(page_for(congress, bill_type, offset), page_cache_for(congress, bill_type, offset), options) if not page: logging.error( "Couldn't download page with offset %i, aborting" % offset) return None # extract matching links doc = html.document_fromstring(page) links = doc.xpath( "//a[re:match(text(), '%s')]" % link_pattern, namespaces={"re": "http://exslt.org/regular-expressions"}) # extract the bill ID from each link for link in links: code = link.text.lower().replace(".", "").replace(" ", "") bill_id = "%s-%s" % (code, congress) if options.get("fast", False): fast_cache_path = utils.cache_dir( ) + "/" + bill_info.bill_cache_for(bill_id, "search_result.html") old_state = utils.read(fast_cache_path) # Compare all of the output in the search result's <p> tag, which # has last major action, number of cosponsors, etc. to a cache on # disk to see if any major information about the bill changed. parent_node = link.getparent( ) # the <p> tag containing the whole search hit parent_node.remove( parent_node.xpath("b")[0] ) # remove the <b>###.</b> node that isn't relevant for comparison new_state = etree.tostring( parent_node) # serialize this tag if old_state == new_state: logging.info("No change in search result listing: %s" % bill_id) continue bill_states[bill_id] = new_state bill_ids.append(bill_id) if len(links) < 100: break offset += 100 # sanity check, while True loops are dangerous if offset > 100000: break return utils.uniq(bill_ids)
def print_mutators_list(out, mutator_dict, test_dict, info, platform): LibSuffix = platform['filename_conventions']['library_suffix'] ObjSuffix = platform['filename_conventions']['object_suffix'] out.write( "######################################################################\n" ) out.write("# A list of all the mutators to be compiled\n") out.write( "######################################################################\n\n" ) module_list = [] for t in test_dict: module_list.append(t['module']) module_set = set(module_list) for m in module_set: out.write("\n") out.write("include_directories (\"..src/%s\")\n" % m) out.write("set (%s_MUTATORS\n" % (m)) module_tests = filter(lambda t: m == t['module'], test_dict) module_mutators = map(lambda t: t['mutator'], module_tests) for t in utils.uniq(module_mutators): out.write("\t%s\n" % (t)) out.write(")\n\n") out.write("set (%s_OBJS_ALL_MUTATORS\n" % (m)) for t in utils.uniq(module_mutators): out.write("\t%s%s\n" % (t, ObjSuffix)) out.write(")\n\n") # We're doing this cmake list style, so we need multiple iterations # since cmake doesn't support structs # Iteration 1: print the list of libraries out.write("set (MUTATOR_NAME_LIST\n") for m in mutator_dict: out.write("\t%s\n" % m['name']) out.write("\t)\n\n") # Iteration 2: The appropriate module library for each mutator out.write("set (MUTATOR_MODULE_LIB_LIST\n") for m in mutator_dict: # Module info is stored with the "test" dictionary, not the # "mutator" dictionary tests = filter(lambda t: t['mutator'] == m['name'], test_dict) modules = map(lambda t: t['module'], tests) if (len(utils.uniq(modules)) != 1): print "ERROR: multiple modules for test " + m['name'] raise module = modules.pop() out.write("\ttest%s\n" % module) # Keep this so we can provide source directories m['module'] = module out.write("\t)\n\n") # Iteration 3: print the list of sources for these libraries. Sources # must be singular (so, really, 'source') out.write("set (SRC src)\n") out.write("set (MUTATOR_SOURCE_LIST\n") for m in mutator_dict: if (len(m['sources']) != 1): print "ERROR: multiple sources for test " + m['name'] raise out.write("\t${SRC}/%s/%s\n" % (m['module'], m['sources'][0])) out.write("\t)\n\n") # Now, iterate over these lists in parallel with a CMake foreach # statement to build the add_library directive out.write("foreach (val RANGE %d)\n" % (len(mutator_dict) - 1)) out.write("\tlist (GET MUTATOR_NAME_LIST ${val} lib)\n") out.write("\tlist (GET MUTATOR_SOURCE_LIST ${val} source)\n") out.write("\tlist (GET MUTATOR_MODULE_LIB_LIST ${val} comp_dep)\n") out.write("\tset(SKIP FALSE)\n") out.write("\tforeach (dep ${comp_dep})\n") out.write("\t\tif(NOT TARGET ${dep})\n") out.write("\t\t\tset(SKIP TRUE)\n") out.write("\t\tendif()\n") out.write("\tendforeach()\n") out.write("\tif(NOT SKIP)\n") out.write("\t\tadd_library (${lib} ${source})\n") out.write( "\t\ttarget_link_libraries (${lib} ${comp_dep} ${LIBTESTSUITE})\n") out.write("\t\tinstall (TARGETS ${lib} \n") out.write("\t\t RUNTIME DESTINATION ${INSTALL_DIR}\n") out.write("\t\t LIBRARY DESTINATION ${INSTALL_DIR})\n") out.write("\tendif()\n") out.write("endforeach()\n\n")
def project_pis(projects): return sorted(uniq([ppi.investigator for ppi in itertools.chain.from_iterable([project.getPIs() for project in projects])]),key=lambda r: r.lastname + ' ' + r.firstname)
def project_pis(projects): return sorted(uniq([ ppi.investigator for ppi in itertools.chain.from_iterable( [project.getPIs() for project in projects]) ]), key=lambda r: r.lastname + ' ' + r.firstname)
import time from glob import iglob import mailbox # procmail-py - Email content and spam filtering # MIT License # © 2012 Noah K. Tilton <*****@*****.**> from config import BASE_MAILDIR, MY_DOMAINS, addresses, mark_read from spam import spamc, blacklisted from utils import mv, spammy_spamc, mark_as_read, uniq INBOXDIR = os.path.join(BASE_MAILDIR, "INBOX") maildirs_on_disk = [os.path.basename(dir) for dir in iglob(os.path.join(BASE_MAILDIR, "*"))] maildirs_in_file = addresses.values() # <- some of these may not exist maildirs = uniq(maildirs_on_disk + maildirs_in_file) mailboxes = dict((d, mailbox.Maildir(os.path.join(BASE_MAILDIR, d), create=True)) for d in maildirs) # N.B.: the order of the following filters matters. note the return # statements. this short-circuiting is desirable, but has to be done # carefully to avoid double-booking mails. def filter(args): try: key, message = args # BLACKLISTED WORDS/PHRASES if not message.is_multipart(): # Can't run blacklist logic against multipart messages # because random phrases such as "gucci" may show up in # base64-encoded strings ... and I'm too lazy to write a
def numberOfPrimeFactors(n): return len(uniq(primeFactors(n)))
import mailbox # procmail-py - Email content and spam filtering # MIT License # © 2014 Noah K. Tilton <*****@*****.**> from config import BASE_MAILDIR, MY_DOMAINS, addresses, mark_read from spam import spamc, blacklisted from utils import file, spammy_spamc, mark_as_read, uniq INBOXDIR = os.path.join(BASE_MAILDIR, "INBOX") maildirs_on_disk = [ os.path.basename(dir) for dir in iglob(os.path.join(BASE_MAILDIR, "*")) ] maildirs_in_file = addresses.values() # <- some of these may not exist maildirs = uniq(maildirs_on_disk + maildirs_in_file) mailboxes = dict( (d, mailbox.Maildir(os.path.join(BASE_MAILDIR, d), create=True)) for d in maildirs) # N.B.: the order of the following filters matters. note the return # statements. this short-circuiting is desirable, but has to be done # carefully to avoid double-booking mails. def mfilter(args): try: key, message = args # BLACKLISTED WORDS/PHRASES if not message.is_multipart(): # Can't run blacklist logic against multipart messages
def bill_ids_for(congress, options, bill_states={}): # override if we're actually using this method to get amendments doing_amendments = options.get('amendments', False) bill_ids = [] bill_type = options.get('amendment_type' if doing_amendments else 'bill_type', None) if bill_type: bill_types = [bill_type] else: bill_types = utils.thomas_types.keys() for bill_type in bill_types: # This sub is re-used for pulling amendment IDs too. if (bill_type in ('samdt', 'hamdt', 'supamdt')) != doing_amendments: continue # match only links to landing pages of this bill type # it shouldn't catch stray links outside of the confines of the 100 on the page, # but if it does, no big deal link_pattern = "^\s*%s\d+\s*$" % utils.thomas_types[bill_type][1] # loop through pages and collect the links on each page until # we hit a page with < 100 results, or no results offset = 0 while True: # download page, find the matching links page = utils.download( page_for(congress, bill_type, offset), page_cache_for(congress, bill_type, offset), options) if not page: logging.error("Couldn't download page with offset %i, aborting" % offset) return None # extract matching links doc = html.document_fromstring(page) links = doc.xpath( "//a[re:match(text(), '%s')]" % link_pattern, namespaces={"re": "http://exslt.org/regular-expressions"}) # extract the bill ID from each link for link in links: code = link.text.lower().replace(".", "").replace(" ", "") bill_id = "%s-%s" % (code, congress) if options.get("fast", False): fast_cache_path = utils.cache_dir() + "/" + bill_info.bill_cache_for(bill_id, "search_result.html") old_state = utils.read(fast_cache_path) # Compare all of the output in the search result's <p> tag, which # has last major action, number of cosponsors, etc. to a cache on # disk to see if any major information about the bill changed. parent_node = link.getparent() # the <p> tag containing the whole search hit parent_node.remove(parent_node.xpath("b")[0]) # remove the <b>###.</b> node that isn't relevant for comparison new_state = etree.tostring(parent_node) # serialize this tag if old_state == new_state: logging.info("No change in search result listing: %s" % bill_id) continue bill_states[bill_id] = new_state bill_ids.append(bill_id) if len(links) < 100: break offset += 100 # sanity check, while True loops are dangerous if offset > 100000: break return utils.uniq(bill_ids)
def print_mutators_list(out, mutator_dict, test_dict, info, platform): LibSuffix = platform['filename_conventions']['library_suffix'] ObjSuffix = platform['filename_conventions']['object_suffix'] out.write("######################################################################\n") out.write("# A list of all the mutators to be compiled\n") out.write("######################################################################\n\n") module_list = [] for t in test_dict: module_list.append(t['module']) module_set = set(module_list) for m in module_set: out.write("\n") out.write("include_directories (\"..src/%s\")\n" % m) out.write("set (%s_MUTATORS " % (m)) module_tests = filter(lambda t: m == t['module'], test_dict) module_mutators = map(lambda t: t['mutator'], module_tests) for t in utils.uniq(module_mutators): out.write("%s " % (t)) out.write(")\n\n") out.write("set (%s_OBJS_ALL_MUTATORS " % (m)) for t in utils.uniq(module_mutators): out.write("%s%s " % (t, ObjSuffix)) out.write(")\n\n") # We're doing this cmake list style, so we need multiple iterations # since cmake doesn't support structs # Iteration 1: print the list of libraries out.write("set (MUTATOR_NAME_LIST\n") for m in mutator_dict: out.write("\t%s\n" % m['name']) out.write("\t)\n\n") # Iteration 2: The appropriate module library for each mutator out.write("set (MUTATOR_MODULE_LIB_LIST\n") for m in mutator_dict: # Module info is stored with the "test" dictionary, not the # "mutator" dictionary tests = filter(lambda t: t['mutator'] == m['name'], test_dict) modules = map(lambda t: t['module'], tests) if (len(utils.uniq(modules)) != 1): print "ERROR: multiple modules for test " + m['name'] raise module = modules.pop() out.write("\ttest%s\n" % module) # Keep this so we can provide source directories m['module'] = module out.write("\t)\n\n") # Iteration 3: print the list of sources for these libraries. Sources # must be singular (so, really, 'source') out.write("set (SRC src)\n") out.write("set (MUTATOR_SOURCE_LIST\n") for m in mutator_dict: if (len(m['sources']) != 1): print "ERROR: multiple sources for test " + m['name'] raise out.write("\t${SRC}/%s/%s\n" % (m['module'], m['sources'][0])) out.write("\t)\n\n") # Now, iterate over these lists in parallel with a CMake foreach # statement to build the add_library directive out.write("foreach (val RANGE %d)\n" % (len(mutator_dict) - 1)) out.write("\tlist (GET MUTATOR_NAME_LIST ${val} lib)\n") out.write("\tlist (GET MUTATOR_SOURCE_LIST ${val} source)\n") out.write("\tlist (GET MUTATOR_MODULE_LIB_LIST ${val} comp_dep)\n") out.write("\tset(SKIP FALSE)\n") out.write("\tforeach (dep ${comp_dep})\n") out.write("\t\tif(NOT TARGET ${dep})\n") out.write("\t\t\tset(SKIP TRUE)\n") out.write("\t\tendif()\n") out.write("\tendforeach()\n") out.write("\tif(NOT SKIP)\n") out.write("\t\tadd_library (${lib} ${source})\n") out.write("\t\ttarget_link_libraries (${lib} ${comp_dep} ${LIBTESTSUITE})\n") out.write("\t\tinstall (TARGETS ${lib} \n") out.write("\t\t RUNTIME DESTINATION ${INSTALL_DIR}\n") out.write("\t\t LIBRARY DESTINATION ${INSTALL_DIR})\n") out.write("\tendif()\n") out.write("endforeach()\n\n")
#!/usr/bin/python # ADIC client program # # usage is the same as for "adiC" with additional # required option "--username", e.g.: # adic_client.py --username=nobody -vd gradient f.c # import sys, glob, socket from ADIC import ADIC_Client from utils import uniq, get_username, include_files, get_server, string adic = ADIC_Client() username, args = get_username(sys.argv[1:]) options,files = adic.check_options(args) reduce(lambda x, y: x+y,map(glob.glob, files),[]) # expand unix wildcards files = uniq(include_files(files,adic.LanguageClass)) # add include files try: host,port = get_server("r_adic") except socket.error: sys.exit(1) else: print adic.submit_request(host,port,username,string.join(options),files)
#!/usr/bin/python # ADIC client program # # usage is the same as for "adiC" with additional # required option "--username", e.g.: # adic_client.py --username=nobody -vd gradient f.c # import sys, glob, socket from ADIC import ADIC_Client from utils import uniq, get_username, include_files, get_server, string adic = ADIC_Client() username, args = get_username(sys.argv[1:]) options, files = adic.check_options(args) reduce(lambda x, y: x + y, map(glob.glob, files), []) # expand unix wildcards files = uniq(include_files(files, adic.LanguageClass)) # add include files try: host, port = get_server("r_adic") except socket.error: sys.exit(1) else: print adic.submit_request(host, port, username, string.join(options), files)
def collect(options = {}): #landing page with links to all guest prayers page = fromstring(download('http://chaplain.house.gov/chaplaincy/guest_chaplains.html')) links = uniq(page.xpath("//td/a/@href")) limit = options.get("limit", False) if limit: links = links[:limit] for link in links: try: uid = link.split('id=')[1] except Exception, e: print e continue html = fromstring(download('http://chaplain.house.gov/chaplaincy/' + link, uid + '.html')) info = {} info['name'] = html.xpath("//h2/text()")[0] #get h3 pairings, guess whether a church is listed based on number of hits hs = html.xpath("//h3/text()") if len(hs) > 1: info['church'] = hs[0].strip() info['location'] = hs[1].strip() else: info['location'] = hs[0].strip() # get boldface pairings for pair in html.xpath('//strong'): if pair.tail: label, data = pair.text.replace(':', '').strip(), pair.tail.strip() info[label.lower().split(" ")[0]] = data elif pair.getnext().tag == 'a': label, data = pair.text.replace(':', '').strip(), pair.getnext().xpath("text()")[0].strip() info[label.lower().split(" ")[0]] = data # add h4/p pairings for pair in html.xpath('//h4'): if pair.getnext().tag == 'p': label, data = pair.text.replace(':', '').strip(), '\n'.join([x.strip() for x in pair.getnext().xpath("text()")]) info[label.lower().split(" ")[0]] = data if "one" in info: info["introduction"] = info["one"] info.pop("one") #sessions info["session"] = int(math.floor((int(info["date"].split("/")[-1]) - 1789) / 2) + 1) info['uid'] = uid info['member'] = {} #get bioguide match for sponsor if 'sponsor' in info: #fix a recurring typo on House Chaplain website info['member'] = {} info['sponsor'] = info['sponsor'].replace("Rep. Rep.", "Rep.") pieces = re.search("\s(.+?), \(([A-Z])-([A-Z]{2})\)", info['sponsor']).groups() info['member']['name'] = pieces[0] info['member']['party'] = pieces[1] info['member']['state'] = pieces[2] member_info = lookup(info['member']['name'], info['session'], info['member']['state'], 'house') if member_info['status'] == 'Found': #use name info from API instead since it's more canonical if not member_info['middle_name']: member_info['middle_name'] = '' info['member']['name'] = member_info["first_name"] + " " + member_info['middle_name'] + " " + member_info['last_name'] info['member']['name'] = info['member']['name'].replace(" ", " ") info['member']['state'] = member_info["state"] info['member']['bioguide'] = member_info['id'] else: print member_info['status'], info['member']['name'] print "Unable to find %s (%d) in the NYT API" % (info['sponsor'], info['session']) info['member']['bioguide'] = None info.pop("sponsor") write(json.dumps(info, indent=2), os.getcwd() + "/data/" + uid + ".json")