def _init_users(): """ Initialize user data. """ for status in User.STATUS: db.add(UserStatus(status.value, titleize(status.name))) db.flush() for permission in PERMISSION: db.add(Permission(permission.value, pluralize(titleize(permission.name)))) db.flush() # user records users_stream = pkg_resources.resource_stream('dx.orm', os.path.join('data', 'users.tsv')) for row in csv.DictReader(users_stream, delimiter='\t'): user = User(row['email'], app.config['DEFAULT_PASSWORD'], row['name']) user.status_id = int(User.STATUS.ACTIVE) user.confirmed = True user.force_password_reset = True user.bio = UserBio() if row['permissions']: for permission_id in row['permissions'].split(','): user.permissions[permission_id] = Permission.query.get(permission_id) db.add(user) db.flush() db.add(Project('test', 'Test', User.query.first())) db.flush() log.info('user data initialized')
def get_prettify_funcs(): return { 'number': lambda v: locale.format('%d', v, grouping=True), 'dollars_cents': lambda v: locale.currency(v, grouping=True), 'name': lambda v: titleize(v), 'city_state': lambda v: ','.join([titleize(v.split(',')[0]), v.split(',')[1]]), }
def pop_medals(conn, pp): with open(MEDAL_DATA) as csvfile: #conn.execute("TRUNCATE nocs CASCADE;"); conn.execute("TRUNCATE medals CASCADE;"); conn.execute("ALTER SEQUENCE medals_id_seq RESTART WITH 1;"); reader = csv.DictReader(csvfile) for row in reader: last_name = (row['Athlete'].split(', '))[0].replace("'", "''") first_name = row['Athlete'].split(', ')[1].replace("'", "''") if len(row['Athlete'].split(', ')) == 2 else "" year = int(row['Edition']) if(year >= 1968 and row['Season'] == 'Summer'): sql = """with event as (select id from events where name = '{0}' and sport_id = {1} and gender = '{2}'), athlete as (select id from athletes where first_name = '{3}' and last_name = '{4}'), country as (select id from countries where noc = '{5}'), olympic as (select id from olympics where year = {6} and season = '{7}') INSERT INTO medals (event_id, athlete_id, olympic_id, country_id, rank) (SELECT event.id, athlete.id, olympic.id, country.id, '{8}' from event, athlete, olympic, country) returning id; """.format(row['Event'], sports_dict[row['SportID']], row['Gender'], inflection.titleize(first_name), inflection.titleize(last_name), row['NOC'], row['Edition'], row['Season'], row['Medal']) ret = conn.execute(sql)
def __next__(self): while True: linenum, line = next(self._voters_iter) line = line.strip() if not line: log.warning( 'PADRON_COMPLETO.txt :: Ignoring empty line #{}'.format( linenum ) ) continue try: # Parse line parts = line.split(',') assert len(parts) == 8 parsed = { 'id': int(parts[0]), 'district': int(parts[1]), 'sex': int(parts[2]), 'expiration': datetime.strptime(parts[3], '%Y%m%d').date(), 'site': int(parts[4]), 'name': titleize(parts[5].strip()), 'family_name_1': titleize(parts[6].strip()), 'family_name_2': titleize(parts[7].strip()), } # Validate district code district_code = parsed['district'] district_key = ( district_code // 100000, (district_code % 100000) // 1000, district_code % 1000 ) assert district_key in self._distelec.districts # FIXME: Shall we perform some other assert here to validate # data? # Include sample if not present if parsed['district'] not in self.samples: self.samples[parsed['district']] = parsed['id'] return parsed except Exception: self._bad_data.append(linenum) log.error( 'PADRON_COMPLETO.txt :: Bad data at line #{}:\n{}'.format( linenum, line ) ) log.debug(format_exc()) continue
def title(self): if self.is_index_page(): subdir = posixpath.split(posixpath.dirname(self.name))[-1] if subdir == "/": title_from_name = "Home" else: title_from_name = inflection.titleize(subdir) else: title_from_name = inflection.titleize(self.baserootname()) return self.setting('title') or title_from_name
def __count_by_keys(self, df, name="no_name", key="mean"): gk = "{}_gnd".format(key[:-4]) binn = self.__counting(df, key, gk) binn.update(name=name, key=Inf.titleize(key)) if self.preview: print binn return binn
def display(self): """ Returns the display name for this table. :return <str> """ return self.__display or inflection.titleize(self.__name)
def update_or_create_associations(self, behavior): # Update/Create template_feature_behavior and set it # This is done after the update because the template references back to the Behavior self.template_feature_behavior = self.update_or_create_template_feature_behavior(behavior._template_feature_behavior) self.name = titleize(BehaviorKey.Fab.remove(self.key)) self.save() # Handle the manys that were embedded in the instance constructor # _parents are always configured without the BehaviorKey prefix if behavior._parents: self.parents.clear() self.parents.add(*Behavior.objects.filter(key__in=map(lambda parent: BehaviorKey.Fab.ricate(parent), behavior._parents))) if behavior._tags: self.tags.clear() self.tags.add(*map(lambda tag: Tag.objects.update_or_create(tag=tag.tag)[0], behavior._tags)) # Add the a default tag that matches the key of the Behavior tag_key = BehaviorKey.Fab.remove(self.key) if not self.tags.filter(tag=tag_key): self.tags.add(Tag.objects.update_or_create(tag=tag_key)[0]) # Inherit the intersection from the first parent if we don't have one # Default to a polygon to polygon GeographicIntersection intersection = behavior._intersection or\ (self.parents.all()[0].intersection_subclassed if\ self.parents.count() == 1 else\ GeographicIntersection.polygon_to_polygon) if not intersection: raise Exception("All behaviors must have a default intersection") # Use all attributes to find/create the Intersection instance, including nulls # Behaviors can't have specific Intersection properties, as FeatureBehaviors can, so it's safe to # share Intersection instances among them. The Intersection of a Behavior is really just the template # for that of a FeatureBehavior self.intersection = intersection.__class__.objects.get_or_create(**model_dict(intersection))[0] self.save()
def bridgemem_details(self): """ :return: list vlans or bridge names of various stp states MODIFY """ if not self.iface.is_bridgemem(): return None # check if port is in STP _str = '' _stpstate = self.iface.stp.state # get the list of states by grabbing all the keys if self.iface.vlan_filtering: _vlanlist = self.iface.vlan_list _header = [_("all vlans on l2 port")] _table = [[', '.join(linux_common.create_range('', _vlanlist))]] _str += tabulate(_table, _header, numalign='left') + self.new_line() _header = [_("untagged vlans")] _table = [[', '.join(self.iface.native_vlan)]] _str += tabulate(_table, _header, numalign='left') + self.new_line() for _state, _bridgelist in _stpstate.items(): if _bridgelist: _header = [_("vlans in %s state") % (inflection.titleize(_state))] # if vlan aware and bridgelist is not empty, then assume # all vlans have that stp state if self.iface.vlan_filtering: _table = [[', '.join(linux_common.create_range( '', _vlanlist))]] else: _table = [self._pretty_vlanlist(_bridgelist)] _str += tabulate(_table, _header, numalign='left') + self.new_line() return _str
def prettify_header(column): column = str(column) for c in get_prettify_funcs().keys(): column = column.replace(c, '') column = titleize(column) column = column.strip() return column
def randomTrendTweet(): trends = getTrends() if len(trends) == 0: sys.stderr.write("Couldn't get any trending topics. :-/\n") return trend = random.choice(trends) if trend[0] == "#": text = trend[1:] text = inflection.titleize(text) text = titlecase.titlecase(text) else: text = trend image = SimulatorGeneratorImage.getImageFor( text, safeSearchLevel=config.get("services", "google_safesearch"), referer="http://twitter.com/SimGenerator" ) year = random.randint(config.getint("settings", "minyear"), datetime.date.today().year) artFile = "output-%s.png" % datetime.datetime.now().strftime("%Y-%m-%d-%H%M.%f") artFile = os.path.join(tempfile.gettempdir(), artFile) SimulatorGeneratorImage.createBoxArt( text, year, image, artFile, maxSize=( str(twitterGlobalConfig["photo_sizes"]["large"]["w"] - 1), str(twitterGlobalConfig["photo_sizes"]["large"]["h"] - 1), ), deleteInputFile=True, ) tweetString = text if trend[0] == "#": tweetString = trend + " " + tweetString tweet(tweetString, year, artFile)
def _get_title(self): h2 = Helper2() commit_comments = h2.commit_comments() if len(commit_comments) == 1: return commit_comments[0] else: return inflection.titleize(h2.current_branch())
def display(self): """ Returns the display text for this column. :return <str> """ return self.__display or inflection.titleize(self.__name)
def __init__(self, name, text, title=None, image = None): self.name = name self.text = text self.image = image self.obstacle = None self.title = title if not self.title: self.title = inflection.titleize(name)
def generate_model_name(raml_resource): """ Generate model name. :param raml_resource: Instance of ramlfications.raml.ResourceNode. """ resource_uri = get_resource_uri(raml_resource).strip('/') resource_uri = re.sub('\W', ' ', resource_uri) model_name = inflection.titleize(resource_uri) return inflection.singularize(model_name).replace(' ', '')
def title(self): """ Canonical title of document. Tries to guess from document name if `title` setting not provided. """ if self.setting('title'): return self.setting('title') if self.is_index_page(): subdir = posixpath.split(posixpath.dirname(self.name))[-1] if subdir == "/": return "Home" elif subdir: return inflection.titleize(subdir) else: return inflection.titleize(self.baserootname()) else: return inflection.titleize(self.baserootname())
def edit_user(): form = EditUserForm() user = current_user if form.validate_on_submit(): user.name.first = re.sub('<[^>]*>', '', request.form['first_name']) user.name.last = re.sub('<[^>]*>', '', request.form['last_name']) user.name.full = titleize(user.name.first + ' ' + user.name.last) user.save() return redirect(url_for('user', user_id=user._id)) return render_template('edit_user.html', user=user)
def run(self): date_style = xlwt.XFStyle() date_style.num_format_str = self.setting('date-format-string') bold_font = xlwt.Font() bold_font.bold = True bold_style = xlwt.XFStyle() bold_style.font = bold_font filename = self.setting('filename') if os.path.exists(filename): os.remove(filename) workbook = xlwt.Workbook() ws = workbook.add_sheet("Articles") keys = self.setting('fields') # Write Headers for j, k in enumerate(keys): heading = inflection.titleize(k.replace("_id", "_identifier")) ws.write(0, j, heading, bold_style) for i, article in enumerate(Article.select()): print article for j, key in enumerate(keys): if key.startswith("journal."): value = getattr(article.journal, key.replace("journal.", "")) elif key.startswith("publisher."): if article.journal.publisher is not None: value = getattr(article.journal.publisher, key.replace("publisher.", "")) else: value = None else: value = getattr(article, key) fmt = None if isinstance(value, ModelBase): value = unicode(value) elif isinstance(value, datetime.date): fmt = date_style elif inspect.ismethod(value): value = value() else: pass if fmt: ws.write(i+1, j, value, fmt) else: ws.write(i+1, j, value) workbook.save(filename) print " pubspeed report written to %s" % filename
def pop_athletes(conn, pp): with open(MEDAL_DATA) as csvfile: reader = csv.DictReader(csvfile) conn.execute("TRUNCATE athletes CASCADE;"); conn.execute("ALTER SEQUENCE athletes_id_seq RESTART WITH 1;"); for row in reader: last_name = (row['Athlete'].split(', '))[0].replace("'", "''") first_name = row['Athlete'].split(', ')[1].replace("'", "''") if len(row['Athlete'].split(', ')) == 2 else "" sql = """SELECT id FROM athletes WHERE first_name = '{0}' AND last_name = '{1}';""".format(inflection.titleize(first_name), inflection.titleize(last_name)) res = conn.execute(sql); ret = res.first(); if(ret == None): sql = """INSERT INTO athletes (first_name, last_name, gender) VALUES ('{0}','{1}','{2}');""".format(inflection.titleize(first_name), inflection.titleize(last_name), row['Gender']) res = conn.execute(sql)
def find_handler(self, message, bound_handlers): """ Find the handler for a message. """ try: handler = self.sqs_message_handler_registry.find(message.media_type, bound_handlers) self.opaque["handler"] = titleize(handler.__class__.__name__) return handler except KeyError: raise IgnoreMessage(f"No handler was registered for: {message.media_type}")
def mangle(original): title = original.istitle() human = inflection.humanize(original) == original mangled = ' '.join(map(mangle_word, original.split())) if title: return inflection.titleize(mangled) if human: return inflection.humanize(mangled) return mangled
def config(self): file_name = os.path.basename(sys.argv[0]) plugin_name = os.path.splitext(file_name)[0] title = inflection.titleize(plugin_name) print('graph_title ' + title) print('graph_category ' + self.category) print('graph_vlabel ' + self.vlabel) for field in self.fields: if isinstance(field, six.string_types): field = Field(field) field.config()
def _config(): file_name = os.path.basename(sys.argv[0]) plugin_name = os.path.splitext(file_name)[0] title = inflection.titleize(plugin_name) plugin = PluginProxy(auto_evaluate=True) print('graph_title ' + title) print('graph_category ' + plugin.category) print('graph_vlabel ' + plugin.vlabel) for field in plugin.fields: if isinstance(field, six.string_types): field = Field(field) field.config()
def matches_conditions(self, row_obj, **expected_dict): """Returns whether tree item matches conditions. Conditions are evaluated similarly to how `obj_dict` is built. """ for attr_name, expected_value in expected_dict.iteritems(): if hasattr(row_obj, attr_name): actual_value = getattr(row_obj, attr_name) # should be a property else: header_name = inflection.titleize(attr_name) actual_value = self.text_for_header(header_name) if actual_value != expected_value: return False return True
def ports_of_some_kind_of_state(self, statename): _header = [_("ports in %s state") % (inflection.titleize(statename))] _table = [] _portlist = [_x.name for _x in self.iface.stp.member_state.get(statename)] if _portlist: _table2 = [] self.print_portlist_in_chunks(_portlist, '', _table2) for i in _table2: _table.append([i]) return tabulate(_table, _header) + self.new_line() return ''
def db_entity_defaults(db_entity, config_entity=None): # Instantiate a FeatureClassCreator to make the FeatureClassConfiguration feature_class_creator = FeatureClassCreator(config_entity, db_entity, no_ensure=True) if config_entity: # Find the database of the configured client connection = resolve_fixture(None, "init", InitFixture, config_entity.schema()).import_database() # Remove the _test suffix when running unit tests. Until we can auto-population the _test version # of the database, we want to simply rely on the manually configured source database if connection: connection['database'] = connection['database'].replace('test_', '') else: # No config_entity abstract DbEntity case connection = None return dict( # The name is passed in or the titleized version of key name=db_entity.name or titleize(db_entity.key), # Postgres URL for local sources, or possibly a remote url (e.g. for background layer sources) # Unless overridden, create the url according to this postgres url scheme url=db_entity.url or \ ('postgres://{user}:{password}/{host}:{port}/{database}'.format( **merge(dict(port=5432), connection)) if connection else None), # Normally Equals the key, except for views of the table, like a Result DbEntity # Views leave this null and rely on query table=db_entity.table or (db_entity.key if not db_entity.query else None), # Query to create a "view" of the underlying data. Used by Result DbEntity instances query=db_entity.query, # How to group the features or query results. Not yet well hashed out group_by=db_entity.group_by, # The source DbEntity key if this DbEntity resulted from cloning a peer DbEntity source_db_entity_key=db_entity.source_db_entity_key, # Array used by remote data sources whose URLs have different host names # If so then the url will have a string variable for the host hosts=db_entity.hosts, # The User who created the DbEntity. TODO. Default should be an admin creator=db_entity.creator if hasattr(db_entity, 'creator') else get_user_model().objects.filter()[0], # The User who updated the DbEntity. TODO. Default should be an admin updater=db_entity.creator if hasattr(db_entity, 'creator') else get_user_model().objects.filter()[0], # The SRID of the Feature table srid=db_entity.srid, # This is a non-model object. So it is saved as a PickledObjectField # Whether the same instance is returned or not does not matter # If db_entity.feature_class_configuration is None, it will return None feature_class_configuration=feature_class_creator.complete_or_create_feature_class_configuration( db_entity.feature_class_configuration ), no_feature_class_configuration=db_entity.no_feature_class_configuration # feature_behavior is handled internally by DbEntity )
def organization_index(request, organization_type_slug=None, query_string=None): organization_type = None clean_index = False if organization_type_slug: organization_type = titleize(organization_type_slug).capitalize() organizations = Organization.objects.filter(organization_type=organization_type) else: clean_index = True organizations = Organization.objects.all() organizations = organizations.order_by('full_name') if request.method == 'POST': form = OrganizationSearchForm(request.POST) if form.is_valid(): query_string = form.cleaned_data['text'] return HttpResponseRedirect(reverse('view_organization_query', kwargs={'query_string': query_string})) else: form = OrganizationSearchForm() if query_string: query = slugify(query_string) orgs = [] for organization in organizations: if query in slugify(organization.full_name): orgs.append(organization) organizations = orgs clean_index = False organizations_length = len(organizations) # dictionary to be returned in render_to_response() return_dict = { 'clean_index': clean_index, 'form': form, 'organization_type': organization_type, 'organizations': organizations, 'organizations_length': organizations_length, 'query_string': query_string, } return render_to_response("organizations/index.html", return_dict, context_instance=RequestContext(request))
def onMatch(self,token,key,isCatch,tokens): isMatch=False match=re.search(self.dict[key][self.regex],token,flags=re.IGNORECASE) if match: token=re.sub(self.dict[key][self.regex],self.dict[key][self.replace],token) if token: token=inflection.titleize(token) token=token.lower() isCatch=False isMatch=True return isCatch,token,isMatch
def projects_number_of_projects(request): geographical_scopes_by_id = {} for geographical_scope in GeographicalScope.objects.all(): geographical_scopes_by_id[geographical_scope.id] = geographical_scope.name fundings = Funding.objects.all().select_related('project', 'funding_program') projects_data = defaultdict(lambda: defaultdict(set)) scopes = set() for funding in fundings: for year in range(funding.project.start_year, funding.project.end_year + 1): projects_data[year][funding.project_id].add(geographical_scopes_by_id[funding.funding_program.geographical_scope_id]) scopes.add(geographical_scopes_by_id[funding.funding_program.geographical_scope_id]) years_range = sorted(projects_data.keys()) default_project_dict = {} for scope in scopes: default_project_dict[scope] = OrderedDict() for year in years_range: default_project_dict[scope][year] = 0 for year in years_range: for project_id in projects_data.get(year, []): for scope in projects_data[year][project_id]: default_project_dict[scope][year] += 1 project_counts = [] for scope, value_dict in default_project_dict.iteritems(): item_dict = { 'key': str(inflection.titleize(scope)), 'values': [] } for year, count in value_dict.iteritems(): item_dict['values'].append({'x': year, 'y': count}) project_counts.append(item_dict) return_dict = { 'web_title': u'Number of projects', 'project_counts': project_counts, 'years_range': years_range, } return render(request, "charts/projects/number_of_projects.html", return_dict)
def make_columns(df): ''' makes column schemas ''' schema = [] columnNames = list(df.columns) names = [ inflection.titleize(k) for k in columnNames] for i in range(len(columnNames)): col = {} col['fieldName'] = columnNames[i] col['name'] = names[i] col['dataTypeName'] = None schema.append(col) schema = pd.DataFrame(schema) return schema
def generate_modules(packagedir, specification): """Create the basic Python modules in the `_raw` package, and ensure the accompanying modules in the `service` package exist. The output directory argument should be the location of the existing 'src/flyingcircus/' directory, where generated files will be placed. Existing files will be replaced. """ # Check inputs raw_dirname = os.path.join(packagedir, "_raw") assert os.path.isdir( raw_dirname), "The 'flyingcircus._raw' package does not exist" service_dirname = os.path.join(packagedir, "service") assert os.path.isdir( service_dirname), "The 'flyingcircus.service' package does not exist" # Load data as a python dictionary with open(specification, "r") as fp: all_data = json.load(fp) assert set(all_data.keys()) == {"PropertyTypes", "ResourceSpecificationVersion", "ResourceTypes"}, \ "Found an unknown top-level key" # Group the data by AWS service and calculate additional data services = {} for resource_type, resource_data in all_data["ResourceTypes"].items(): # Break down the resource type assert resource_type.count( "::" ) == 2, "Fully qualified resource type should have 3 components" provider_name, service_name, resource_name = resource_type.split("::") assert provider_name == "AWS", "Resource provider is expected to be AWS" if service_name not in SUPPORTED_AWS_SERVICES: LOGGER.info( "Skipping '%s' because we don't yet support that service", resource_type) continue # Ensure the service data exists service = services.setdefault( service_name, { "documentation": { "url": SERVICE_DOCUMENTATION_URLS.get(service_name) }, "module_name": service_name.lower(), "name": service_name, "resources": {}, }) # Augment the resource data and add it to the service assert resource_name not in service[ "resources"], "resource type is defined twice" service["resources"][resource_name] = resource_data resource_data.update({ "extra_aws_attributes": RESOURCES_WITH_EXTRA_ATTRIBUTES.get(resource_type, []), "friendly_name": inflection.titleize(resource_name), "type": { "fullname": resource_type, }, }) assert not set(resource_data["extra_aws_attributes"]).difference({"CreationPolicy", "UpdatePolicy"}), \ "We have defined extra AWS attributes that are not handled by the class constructor in the Jinja template" # TODO collect property types in the same way # TODO verify that a property doesn't have the same name as a resource (nor an existing property) # Create a Python module for each AWS service, with a Python class for # each AWS resource type env = Environment( loader=FileSystemLoader( SCRIPTDIR ), # TODO put our template into a "standard" location for Jinja ) raw_template = env.get_template('raw_module.py.jinja2') service_template = env.get_template('service_module.py.jinja2') for service_name, service in sorted(services.items()): if not service["documentation"]["url"]: LOGGER.warning( "Service %s does not have a documentation URL configured", service_name) # Create or update the "raw" python module raw_module_name = os.path.join(raw_dirname, service["module_name"] + ".py") with open(raw_module_name, "w") as fp: LOGGER.debug("Generating raw module %s.py", service_name.lower()) fp.write(raw_template.render(service=service)) # Ensure that the "service" module exists, and pre-populate it with # the boilerplate if it doesn't service_module_name = os.path.join(service_dirname, service["module_name"] + ".py") if not os.path.exists(service_module_name): with open(service_module_name, "w") as fp: LOGGER.debug("Generating service module %s.py", service_name.lower()) fp.write(service_template.render(service=service))
def builder(sources, document, etable=None, erows=None): """ Build report html by adding tables for sources data :param sources: Main sources for tables :param document: Main HTML document :param etable: Reference table :param erows: Reference row :return: """ for element, value in sources.copy().items(): titles = titleize(humanize(element)) if etable and not isinstance(sources[element], list) and not erows: with etable: trow = tr() if not isinstance(value, dict): trow.add(th(titles)) trow.add(td(value)) elif erows: with erows: if element == value: # Heading erows.add(th(titles)) else: erows.add(td(value)) elif not erows and not etable and isinstance(value, str): with document: with table(id=f'table_{element}', _class="clearfix").add(tbody()) as etable: etable.add(div(_class="pointer")) etable.add(h3(TEXT_GENERAL)) # Heading xrow = tr() xrow.add(th(titles)) xrow.add(td(value)) # If element is dict inspect each key value and clear useless keys if isinstance(sources[element], dict): with document: # div(_class="pointer") h3(titles) with table(id=f'table_{element}', _class="clearfix").add(tbody()) as tables: builder(sources[element], document, tables) # If element is list inspect each key value and clear useless keys if isinstance(sources[element], list): # How many elements this list has size = len(sources[element]) with document: # Make a table with table(id=f'table_{element}', _class="clearfix", style="page-break-inside: avoid; margin-top: 2rem;").add(tbody()) as tables: # Add headings with tr(id=f'row_heading_{element}_heading'): columns = 1 with suppress(Exception): columns = len(next(iter(sources[element])).keys()) with td(colspan=columns) as heading: heading.add(h3(f'{titles} - {size}', _class="clearfix")) # Get each element value for index, elements in enumerate(sources[element]): if isinstance(elements, dict): sources[element][index] = OrderedDict(elements) if index == 0: headings = {} for part in elements.keys(): headings.update({part: part}) # Add headings with tr(id=f'row_heading_{element}_{index}') as r: builder(headings, document, tables, r) # Add rows with tr(id=f'row_{element}_{index}') as rows: builder(elements, document, tables, rows) return sources
import requests from bs4 import BeautifulSoup import inflection URL = 'http://www.dailysmarty.com/topics/python/' page = requests.get(URL) soup = BeautifulSoup(page.content, 'lxml') for post in soup.find_all(class_='post-link-title'): for title_wrapper in post.find_all('h2'): title_in_text = f'h2-----the title should be the following => {title_wrapper.text} <=' # print(title_in_text) links = [] for links in title_wrapper('a'): links.append(links.get('href')) # print(f'Link..............{links}') for title_url in title_wrapper.find_all('a', href=True): url = (title_url['href']) length = (len(title_url['href'])) unformatted_title = (title_url['href'][7:]) # print(url) # print(length) # print(unformatted_title) print(inflection.titleize(unformatted_title))
'1': [(0, 1032, 4), (0, 1024, 1)], '2': [(1, 1032, 4), (0, 1024, 1)], '3': [(2, 1032, 4), (0, 1024, 1)], '4': [(3, 1032, 4), (0, 1024, 1)] } } # Defines the possible anomalies to flag through the web app ANOMALIES = [ 'snowball', 'cosmic_ray_shower', 'crosstalk', 'data_transfer_error', 'diffraction_spike', 'excessive_saturation', 'ghost', 'guidestar_failure', 'persistence', 'satellite_trail', 'other' ] # Defines the possible anomalies (with rendered name) to flag through the web app ANOMALY_CHOICES = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES] FOUR_AMP_SUBARRAYS = ['WFSS128R', 'WFSS64R', 'WFSS128C', 'WFSS64C'] # Possible suffix types for nominal files GENERIC_SUFFIX_TYPES = [ 'uncal', 'cal', 'rateints', 'rate', 'trapsfilled', 'i2d', 'x1dints', 'x1d', 's2d', 's3d', 'dark', 'crfints', 'crf', 'ramp', 'fitopt', 'bsubints', 'bsub', 'cat' ] # Possible suffix types for guider exposures GUIDER_SUFFIX_TYPES = [ 'stream', 'stacked_uncal', 'image_uncal', 'stacked_cal', 'image_cal' ]
def get_package_dict(self, iso_values, harvest_object): ''' ''' package_dict = SpatialHarvester.get_package_dict( self, iso_values, harvest_object) simple_keys = { 'publisher_info', 'resource-provider', 'distributor-info', 'aggregation-info', 'distributor-formats', 'additional-information-source', 'purpose', # Constraints 'use-constraints', 'access-constraints', 'use-limitations', 'fees', # lineage 'lineage', 'lineage-process-steps', } extras = {k: iso_values.get(k) for k in simple_keys if k in iso_values} keywords = defaultdict(list) for keyword in iso_values['keywords']: keyword_type = keyword['type'] or 'keywords' keywords[keyword_type].append(keyword) extras['grouped_keywords'] = [] for extra_name, matches, data_filter in ( ('cf_standard_names', ('cf', 'climate and forecast'), lambda s: s.strip().split(' ', 1)[0]), ('gcmd_keywords', ('gcmd', 'global change'), lambda s: s.strip()), ): try: match_raw = next( (d['keywords'] for d in iso_values['keywords'] if d['thesaurus'] and any( v in d['thesaurus']['title'].lower() for v in matches)), None) if match_raw is None: continue elif hasattr(match_raw, '__iter__'): match_result = sorted(set(map(data_filter, match_raw))) else: match_result = data_filter(match_raw) except: match_result = None log.exception( "Execption raised when trying to extract {}".format( extra_name)) if match_result is not None: extras[extra_name] = match_result for keyword_type in [ 'theme', 'dataCenter', 'platform', 'instrument', 'place', 'project', 'dataResolution', 'stratum', 'otherRestrictions', 'keywords' ]: if keyword_type in keywords: extras['grouped_keywords'].append( [titleize(keyword_type), keywords[keyword_type]]) if iso_values.get('publisher', None): extras['publisher'] = iso_values.get('publisher', []) if iso_values.get('browse-graphic', None): browse_graphic = iso_values['browse-graphic'][0]['file'] extras['browse-graphic'] = browse_graphic if iso_values.get('dataset-edition'): extras['dataset-edition'] = iso_values['dataset-edition'] package_dict["version"] = iso_values['dataset-edition'][0] if iso_values.get('presentation-form'): extras['presentation-form'] = iso_values['presentation-form'][0] if iso_values.get('responsible-organisation'): log.info("Checking for responsible-organisation") extras['responsible-organisation'] = iso_values.get( 'responsible-organisation', []) if iso_values.get('responsible-parties'): extras['responsible-parties'] = self.unique_responsible_parties( iso_values.get('responsible-organisation', [])) for item in harvest_object.extras: key = item.key value = item.value if key == 'waf_location': extras['waf_location'] = value break else: extras['waf_location'] = None extras['object_reference'] = harvest_object.id extras_kv = [{ 'key': k, 'value': json.dumps(v) if isinstance(v, (list, dict)) else v } for k, v in extras.items()] package_dict['extras'] = package_dict['extras'] + extras_kv package_dict['resources'] = self.filter_duplicate_resources( package_dict) package_dict['resources'] = self.reorder_resources(package_dict) package_dict = self.update_resources(package_dict) for resource in package_dict["resources"]: if resource["format"] in { "ERDDAP", "ERDDAP-TableDAP", "ERDDAP-GridDAP" }: # TODO: try/catch here try: info_url = re.sub( r"^(https?://.+/erddap/)(?:grid|table)dap(/[^.]+)\.(\w+)$", r"\1info\2/index.csv", resource["url"]) ds = ErddapCSVMetadataReader(info_url) self.get_vertical_extent(ds, package_dict) self.get_ioos_nc_attributes(ds, package_dict) except: pass return package_dict
def col(self): return titleize(self.text)
import requests from bs4 import BeautifulSoup import inflection URL = 'http://www.dailysmarty.com/topics/python' page = requests.get(URL) soup = BeautifulSoup(page.content, 'html.parser') results = soup.find_all('div', class_="post-link-title") for post in results: link = post.find('a')['href'] part = link.partition('/')[2] print(inflection.titleize(part.partition('/')[2]))
import requests from bs4 import BeautifulSoup import inflection #Send request; suggest input variable instead of hard code r = requests.get('http://www.dailysmarty.com/topics/python') #Parse returned request webtext = BeautifulSoup(r.text, "html.parser") #Import Library: re import re #Find links with posts for link in webtext.find_all(href=re.compile("posts")): weburls = link.get('href') #Strip /posts/ from list weburls = weburls[7:] #Make 'em pretty, print output pretty_titles = inflection.titleize(weburls) print(pretty_titles)
def publications_number_of_publications(request): min_year = 2000 max_year = datetime.datetime.now().year + 1 years_range = [] for year in range(min_year, max_year): years_range.append(year) default_pub_dict = {} totals_by_year = {} authored_publications = Publication.objects.all().exclude(authors=None) authored_publications = authored_publications.select_related('journalarticle', 'journalarticle__parent_journal') publication_types = list(set(authored_publications.values_list('child_type', flat=True))) publication_types.append('JCR') for pub_type in publication_types: if pub_type != 'JCR': pub_type = str(inflection.titleize(pub_type)) default_pub_dict[pub_type] = {} for year in range(min_year, max_year): year = str(year) default_pub_dict[pub_type][year] = 0 totals_by_year[year] = 0 for authored_pub in authored_publications: pub_type = authored_pub.child_type if (pub_type == 'JournalArticle') and (authored_pub.journalarticle.parent_journal.impact_factor): pub_type = 'JCR' else: pub_type = str(inflection.titleize(pub_type)) pub_year = authored_pub.year if pub_year in range(min_year, max_year): pub_year = str(pub_year) default_pub_dict[pub_type][pub_year] = default_pub_dict[pub_type][pub_year] + 1 totals_by_year[pub_year] = totals_by_year.get(pub_year, 0) + 1 publication_counts = [] pub_types = ['Journal Article', 'JCR', 'Book Section', 'Book', 'Magazine Article', 'Conference Paper'] publication_counts.append(['Year', 'Total'] + pub_types) for year in range(min_year, max_year): year = str(year) count_row = [year] count_row.append(totals_by_year[year]) for pub_type in pub_types: if pub_type in default_pub_dict and year in default_pub_dict[pub_type]: row_item = default_pub_dict[pub_type][year] else: row_item = 0 count_row.append(row_item) publication_counts.append(count_row) return_dict = { 'web_title': u'Number of publications', 'publication_counts': publication_counts, 'years_range': years_range, } return render(request, "charts/publications/number_of_publications.html", return_dict)
def test_skip_keys_coerce_classmethod(): class Person(Base): id: int name: str created_date: str @classmethod def parse(cls, item): assert cls is d, "class mismatch!" return d(**item) @classmethod def check_class(cls, expected_clss): return instruct.public_class(cls, preserve_subtraction=True) is expected_clss d = Person class Position(Base): id: int supervisor: Tuple[Person, ...] worker: Person task_name: str __coerce__ = { "supervisor": (List[Dict[str, Union[int, str]]], Person.from_many_json), "worker": (Dict[str, Union[int, str]], Person.parse), } @property def some_prop(self): if "task_name" in type(self): return self.task_name return None @classmethod def convert_to_person(cls, item): if not isinstance(item, Person): item = Person.from_json(item) return item p = Position.from_json({"id": 1, "task_name": "Business Partnerships"}) p.supervisor = [{"created_date": "0", "id": 2, "name": "John"}] p.worker = {"created_date": "0", "id": 456, "name": "Sam"} FacelessPosition = Position & {"id": None, "supervisor": {"id"}, "worker": {"id"}} FacelessPerson = Person & {"id"} assert FacelessPerson is FacelessPosition._slots["worker"] fp = FacelessPosition.from_json({"id": 1, "task_name": "Business Partnerships"}) fp.supervisor = [{"created_date": "0", "id": 2, "name": "John"}] assert fp.supervisor[0].name is None assert fp.supervisor[0].id == 2 fp.worker = {"created_date": "0", "id": 456, "name": "Sam"} assert len(fp.worker) == 1 assert "name" not in fp.worker assert isinstance(fp.worker, FacelessPerson) assert fp.to_json() == {"id": 1, "supervisor": [{"id": 2}], "worker": {"id": 456}} assert ( FacelessPosition.__coerce__["worker"][1]({"created_date": "0", "id": 456, "name": "Sam"}) == fp.worker ) assert fp.some_prop is None assert d is Person assert fp.worker.check_class(FacelessPerson) # This makes use of replace_class_references: # We will replace Person in the convert_to_person function with the FacelessPerson # class reference. bart = FacelessPosition.convert_to_person({"created_date": "0", "id": 912, "name": "Bart"}) assert isinstance(bart, FacelessPerson) p = Position.from_json({"id": 1, "task_name": "Business Partnerships"}) p.supervisor = [{"created_date": "0", "id": 2, "name": "John"}] p.worker = {"created_date": "0", "id": 456, "name": "Sam"} assert Person.to_json(p) == { "id": 1, "supervisor": [{"created_date": "0", "id": 2, "name": "John"}], "task_name": "Business Partnerships", "worker": {"created_date": "0", "id": 456, "name": "Sam"}, } with pytest.raises(instruct.exceptions.ClassCreationFailed) as exc: FacelessPosition(farts=1) # ensure the human friendly name is in the class error. assert exc.match(inflection.titleize(instruct.public_class(FacelessPosition).__name__))
def verbose_name(value): return titleize(value.__class__.__name__)
print(link['href']) # Other project solution of Building a Web Scraper in Python. This one actually works though lol. import requests from bs4 import BeautifulSoup from inflection import titleize def title_generator(links): titles = [] def post_formatter(url): if 'posts' in url: url = url.split('/')[-1] url = url.replace('-', ' ') url = titleize(url) titles.append(url) for link in links: if link.get('href') == None: continue else: post_formatter(link.get("href")) return titles r = requests.get('http://www.dailysmarty.com/topics/python') soup = BeautifulSoup(r.text, 'html.parser')
def from_node(cls, node: "GenericNode") -> "Persona": return cls( index=node.index, text=node.match_text, name=titleize(node.match_text), )
async def fetch_id(session, lock, mgpid): url = 'https://www.genealogy.math.ndsu.nodak.edu/id.php?id={:d}'.format( mgpid) async with lock, session.get(url) as response: text = await response.text() tree = html.fromstring(text) logging.debug('fetched %d', mgpid) if tree.text == 'You have specified an ID that does not exist in the database. Please back up and try again.': return None name = n(tree.cssselect('#mainContent h2')[0].text) math_scinet = tree.cssselect( '#mainContent [href*="www.ams.org/mathscinet/MRAuthorID/"]') msn_id = int( math_scinet[0].attrib['href'].split('/')[-1]) if math_scinet else None descendants = [ int(d.attrib['href'].split('=')[1]) for d in tree.cssselect('#mainContent table tr td:first-child a') ] titles = [ e.getparent() for e in tree.cssselect('#mainContent #thesisTitle') ] degrees = [] if titles: children = titles[0].getparent().getchildren() for title in titles: start = children.index(title) - 1 after = children[start:] end = next(i for i, e in enumerate(after) if e.tag == 'p') degree, dissert, *classif, aelem = after[:end + 1] span, *imgs = degree.getchildren() inner, = span.getchildren() university = n(inner.text or '') year = n(inner.tail) degree = n(span.text) country = [inflection.titleize(i.attrib['title']) for i in imgs] diss = n(dissert.cssselect('#thesisTitle')[0].text) if classif: class_id, classification = classif[0].text[36:-1].split('—') class_id = int(class_id) else: class_id = None classification = '' alinks = aelem.cssselect('a') aids = (int(a.attrib['href'].split('=')[1]) for a in alinks) raw_tags = itertools.chain([aelem.text], (a.getnext().tail for a in alinks[:-1])) tags = (n(t).rstrip(':') for t in raw_tags) advisors = list(zip(tags, aids)) degrees.append({ 'degree': degree, 'university': university, 'years': year, 'country': country, 'dissertation': diss, 'subject_id': class_id, 'subject': classification, 'advisors': advisors, }) return { 'name': name, 'id': mgpid, 'msn_id': msn_id, 'descendants': descendants, 'degrees': degrees, }
'data_transfer_error': ['fgs', 'nircam', 'niriss', 'nirspec'], 'ghost': ['fgs', 'nircam', 'niriss', 'nirspec'], 'snowball': ['fgs', 'nircam', 'niriss', 'nirspec'], # instrument-specific anomalies: 'column_pull_up': ['miri'], 'dominant_msa_leakage': ['nirspec'], 'dragons_breath': ['nircam'], 'glow': ['miri'], 'internal_reflection': ['miri'], 'optical_short': ['nirspec'], # Only for MOS observations 'row_pull_down': ['miri'], # additional anomalies: 'other': ['fgs', 'miri', 'nircam', 'niriss', 'nirspec']} # Defines the possible anomalies to flag through the web app ANOMALY_CHOICES = [(anomaly, inflection.titleize(anomaly)) if anomaly != "dominant_msa_leakage" else (anomaly, "Dominant MSA Leakage") for anomaly in ANOMALIES_PER_INSTRUMENT] ANOMALY_CHOICES_FGS = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES_PER_INSTRUMENT if 'fgs' in ANOMALIES_PER_INSTRUMENT[anomaly]] ANOMALY_CHOICES_MIRI = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES_PER_INSTRUMENT if 'miri' in ANOMALIES_PER_INSTRUMENT[anomaly]] ANOMALY_CHOICES_NIRCAM = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES_PER_INSTRUMENT if 'nircam' in ANOMALIES_PER_INSTRUMENT[anomaly]] ANOMALY_CHOICES_NIRISS = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES_PER_INSTRUMENT if 'niriss' in ANOMALIES_PER_INSTRUMENT[anomaly]]
def name(self): return titleize(self.__class__.__name__)
# instrument-specific anomalies: 'column_pull_up': ['miri'], 'dominant_msa_leakage': ['nirspec'], 'dragons_breath': ['nircam'], 'glow': ['miri'], 'internal_reflection': ['miri'], 'optical_short': ['nirspec'], # Only for MOS observations 'row_pull_down': ['miri'], # additional anomalies: 'other': ['fgs', 'miri', 'nircam', 'niriss', 'nirspec'] } # Defines the possible anomalies to flag through the web app ANOMALY_CHOICES = [ (anomaly, inflection.titleize(anomaly)) if anomaly != "dominant_msa_leakage" else (anomaly, "Dominant MSA Leakage") for anomaly in ANOMALIES_PER_INSTRUMENT ] ANOMALY_CHOICES_FGS = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES_PER_INSTRUMENT if 'fgs' in ANOMALIES_PER_INSTRUMENT[anomaly]] ANOMALY_CHOICES_MIRI = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES_PER_INSTRUMENT if 'miri' in ANOMALIES_PER_INSTRUMENT[anomaly]] ANOMALY_CHOICES_NIRCAM = [(anomaly, inflection.titleize(anomaly)) for anomaly in ANOMALIES_PER_INSTRUMENT if 'nircam' in ANOMALIES_PER_INSTRUMENT[anomaly]]
def get_package_dict(self, iso_values, harvest_object): ''' ''' package_dict = SpatialHarvester.get_package_dict( self, iso_values, harvest_object) simple_keys = { 'publisher_info', 'resource-provider', 'distributor-info', 'aggregation-info', 'distributor-formats', 'additional-information-source', 'purpose', # Constraints 'use-constraints', 'access-constraints', 'fees', # lineage 'lineage', 'lineage-process-steps', } extras = {k: iso_values.get(k) for k in simple_keys if k in iso_values} keywords = defaultdict(list) for keyword in iso_values['keywords']: keyword_type = keyword['type'] or 'keywords' keywords[keyword_type].append(keyword) extras['grouped_keywords'] = [] for keyword_type in [ 'theme', 'dataCenter', 'platform', 'instrument', 'place', 'project', 'dataResolution', 'stratum', 'otherRestrictions', 'keywords' ]: if keyword_type in keywords: extras['grouped_keywords'].append( [titleize(keyword_type), keywords[keyword_type]]) if iso_values.get('publisher', None): extras['publisher'] = iso_values.get('publisher', []) if iso_values.get('browse-graphic', None): browse_graphic = iso_values['browse-graphic'][0]['file'] extras['browse-graphic'] = browse_graphic if iso_values.get('dataset-edition'): extras['dataset-edition'] = iso_values['dataset-edition'] package_dict["version"] = iso_values['dataset-edition'][0] if iso_values.get('presentation-form'): extras['presentation-form'] = iso_values['presentation-form'][0] if iso_values.get('responsible-organisation'): log.info("Checking for responsible-organisation") extras['responsible-organisation'] = iso_values.get( 'responsible-organisation', []) if iso_values.get('responsible-parties'): extras['responsible-parties'] = self.unique_responsible_parties( iso_values.get('responsible-organisation', [])) for item in harvest_object.extras: key = item.key value = item.value if key == u'waf_location': extras['waf_location'] = value break else: extras['waf_location'] = None extras['object_reference'] = harvest_object.id extras_kv = [{ 'key': k, 'value': json.dumps(v) if isinstance(v, (list, dict)) else v } for k, v in extras.iteritems()] package_dict['extras'] = package_dict['extras'] + extras_kv package_dict['resources'] = self.filter_duplicate_resources( package_dict) package_dict['resources'] = self.reorder_resources(package_dict) package_dict = self.update_resources(package_dict) return package_dict
def project_index(request, tag_slug=None, status_slug=None, project_type_slug=None, query_string=None): tag = None status = None start_date = None start_range = None end_date = None end_range = None project_type = None form_project_types = None form_project_status = None form_tags = None form_funds_range = None form_from_total_funds = None form_to_total_funds = None form_participants_name = {} form_participants_role = {} clean_index = False request.session['max_year'] = MAX_YEAR_LIMIT request.session['min_year'] = MIN_YEAR_LIMIT if tag_slug: tag = get_object_or_404(Tag, slug=tag_slug) project_ids = ProjectTag.objects.filter(tag=tag).values('project_id') projects = Project.objects.filter(id__in=project_ids) if status_slug: status = status_slug.replace('-', ' ').capitalize() projects = Project.objects.filter(status=status) if project_type_slug: project_type = titleize(project_type_slug).capitalize() projects = Project.objects.filter(project_type=project_type) if not tag_slug and not status_slug and not project_type_slug: clean_index = True projects = Project.objects.all() projects = projects.order_by('-start_year', '-end_year', 'full_name') if request.method == 'POST': form_member_field_count = request.POST.get('member_field_count') form = ProjectSearchForm(request.POST, extra=form_member_field_count) if form.is_valid(): query_string = form.cleaned_data['text'] start_date = form.cleaned_data['start_date'] start_range = form.cleaned_data['start_range'] end_date = form.cleaned_data['end_date'] end_range = form.cleaned_data['end_range'] form_project_types = form.cleaned_data['project_types'] form_project_status = form.cleaned_data['status'] form_from_total_funds = form.cleaned_data['from_total_funds'] form_to_total_funds = form.cleaned_data['to_total_funds'] form_funds_range = form.cleaned_data['funds_range'] form_tags = form.cleaned_data['tags'] for my_tuple in form.fields.items(): if my_tuple[0].startswith('participant_name_'): form_names = form.cleaned_data[my_tuple[0]] if form_names: form_participants_name[my_tuple[0][-1:]] = form_names elif my_tuple[0].startswith('participant_role_'): form_roles = form.cleaned_data[my_tuple[0]] if form_roles: form_participants_role[my_tuple[0][-1:]] = list( form_roles.values()) # tratamiento con los filter, y devolver "projects" filtrado # return a 'index' con los projectos filtrados if start_date: month_year = start_date.split('/') if start_range == '<=': projects = projects.filter( Q(start_year__lt=month_year[1]) | (Q(start_year=month_year[1]) & Q(start_month__lte=month_year[0]))) elif start_range == '<': projects = projects.filter( Q(start_year__lt=month_year[1]) | (Q(start_year=month_year[1]) & Q(start_month__lt=month_year[0]))) elif start_range == '>=': projects = projects.filter( Q(start_year__gt=month_year[1]) | (Q(start_year=month_year[1]) & Q(start_month__gte=month_year[0]))) elif start_range == '>': projects = projects.filter( Q(start_year__gt=month_year[1]) | (Q(start_year=month_year[1]) & Q(start_month__gt=month_year[0]))) elif start_range == '==': projects = projects.filter( Q(start_year=month_year[1]) & Q(start_month=month_year[0])) if end_date: month_year = end_date.split('/') if end_range == '<=': projects = projects.filter( Q(end_year__lt=month_year[1]) | (Q(end_year=month_year[1]) & Q(end_month__lte=month_year[0]))) elif end_range == '<': projects = projects.filter( Q(end_year__lt=month_year[1]) | (Q(end_year=month_year[1]) & Q(end_month__lt=month_year[0]))) elif end_range == '>=': projects = projects.filter( Q(end_year__gt=month_year[1]) | (Q(end_year=month_year[1]) & Q(end_month__gte=month_year[0]))) elif end_range == '>': projects = projects.filter( Q(end_year__gt=month_year[1]) | (Q(end_year=month_year[1]) & Q(end_month__gt=month_year[0]))) elif end_range == '==': projects = projects.filter( Q(end_year=month_year[1]) & Q(end_month=month_year[0])) if form_project_types: projects = projects.filter(project_type__in=form_project_types) if form_project_status: projects = projects.filter(status__in=form_project_status) if form_tags: projects = projects.filter(projecttag__tag__name__in=form_tags) if form_from_total_funds: if form_funds_range == '==': funding_sum = FundingAmount.objects.all().values( 'funding_id').annotate(Sum('own_amount')) filtered_funding_ids = funding_sum.filter( own_amount__sum=form_from_total_funds).values_list( 'funding_id', flat=True) projects = projects.filter( funding__id__in=filtered_funding_ids) elif form_funds_range == '<': funding_sum = FundingAmount.objects.all().values( 'funding_id').annotate(Sum('own_amount')) filtered_funding_ids = funding_sum.filter( own_amount__sum__lt=form_from_total_funds).values_list( 'funding_id', flat=True) projects = projects.filter( funding__id__in=filtered_funding_ids) elif form_funds_range == '<=': funding_sum = FundingAmount.objects.all().values( 'funding_id').annotate(Sum('own_amount')) filtered_funding_ids = funding_sum.filter( own_amount__sum__lte=form_from_total_funds ).values_list('funding_id', flat=True) projects = projects.filter( funding__id__in=filtered_funding_ids) elif form_funds_range == '>': funding_sum = FundingAmount.objects.all().values( 'funding_id').annotate(Sum('own_amount')) filtered_funding_ids = funding_sum.filter( own_amount__sum__gt=form_from_total_funds).values_list( 'funding_id', flat=True) projects = projects.filter( funding__id__in=filtered_funding_ids) elif form_funds_range == '>=': funding_sum = FundingAmount.objects.all().values( 'funding_id').annotate(Sum('own_amount')) filtered_funding_ids = funding_sum.filter( own_amount__sum__gte=form_from_total_funds ).values_list('funding_id', flat=True) projects = projects.filter( funding__id__in=filtered_funding_ids) elif form_funds_range == '-': funding_sum = FundingAmount.objects.all().values( 'funding_id').annotate(Sum('own_amount')) filtered_funding_ids = funding_sum.filter( own_amount__sum__gte=form_from_total_funds) if form_to_total_funds: filtered_funding_ids = filtered_funding_ids.filter( own_amount__sum__lte=form_to_total_funds) filtered_funding_ids = filtered_funding_ids.values_list( 'funding_id', flat=True) projects = projects.filter( funding__id__in=filtered_funding_ids) found = True if form_participants_name: group_projects = [] for key, name in form_participants_name.iteritems(): person_id = Person.objects.filter( slug__contains=slugify(name)).values_list('id', flat=True) if person_id and found: person_projects_set = set() for _id in person_id: participant_roles_ids = [] if key in form_participants_role: for role in form_participants_role[key]: participant_roles_ids.append(role['id']) if participant_roles_ids: person_projects = AssignedPerson.objects.all( ).filter( Q(person_id=_id) & Q(role__in=participant_roles_ids) ).values_list('project_id', flat=True) if person_projects: person_projects_set.update(person_projects) else: person_projects = AssignedPerson.objects.all( ).filter(person_id=_id).values_list( 'project_id', flat=True) if person_projects: person_projects_set.update(person_projects) group_projects.append(person_projects_set) else: found = False if group_projects and found: projects = projects.filter( id__in=list(set.intersection(*group_projects))) query = slugify(query_string) projs = [] person_ids = Person.objects.filter( slug__contains=query).values('id') project_ids = AssignedPerson.objects.filter( person_id__in=person_ids).values('project_id') project_ids = set([x['project_id'] for x in project_ids]) for project in projects: if (query in slugify(project.full_name)) or (project.id in project_ids): projs.append(project) projects = projs if not found: projects = [] session_filter_dict = { 'form_start_date': start_date, 'form_start_range': start_range, 'form_end_date': end_date, 'form_end_range': end_range, 'form_project_types': form_project_types, 'form_project_status': form_project_status, 'form_tags': form_tags, 'projects': serializers.serialize('json', projects), 'form_funds_range': form_funds_range, 'form_from_total_funds': str(form_from_total_funds), 'form_to_total_funds': str(form_to_total_funds), 'form_participants_name': form_participants_name, 'form_participants_role': json.dumps(form_participants_role), 'form_member_field_count': len(form_participants_name), 'query_string': query_string, } request.session['filtered'] = session_filter_dict return HttpResponseRedirect(reverse('filtered_project_query')) else: if 'filtered' in request.session.keys(): p = re.compile(ur'projects\/filtered(\/\?page=[1-9]+)?') if re.search(p, request.path) == None: del request.session['filtered'] form = ProjectSearchForm(extra=1) else: member_field_count = request.session['filtered'][ 'form_member_field_count'] if member_field_count == 0: member_field_count = 1 form = ProjectSearchForm(extra=member_field_count) start_date = request.session['filtered']['form_start_date'] start_range = request.session['filtered']['form_start_range'] end_date = request.session['filtered']['form_end_date'] end_range = request.session['filtered']['form_end_range'] form_project_types = request.session['filtered'][ 'form_project_types'] form_project_status = request.session['filtered'][ 'form_project_status'] form_tags = request.session['filtered']['form_tags'] form_tags = request.session['filtered']['form_tags'] projects = [] for deserialized_object in serializers.deserialize( 'json', request.session['filtered']['projects']): projects.append(deserialized_object.object) form_funds_range = request.session['filtered'][ 'form_funds_range'] form_from_total_funds = request.session['filtered'][ 'form_from_total_funds'] form_to_total_funds = request.session['filtered'][ 'form_to_total_funds'] form_participants_name = request.session['filtered'][ 'form_participants_name'] form_participants_role = json.loads( request.session['filtered']['form_participants_role']) query_string = request.session['filtered']['query_string'] clean_index = False else: form = ProjectSearchForm(extra=1) if query_string: query = slugify(query_string) projs = [] person_ids = Person.objects.filter(slug__contains=query).values('id') project_ids = AssignedPerson.objects.filter( person_id__in=person_ids).values('project_id') project_ids = set([x['project_id'] for x in project_ids]) for project in projects: if (query in slugify(project.full_name)) or (project.id in project_ids): projs.append(project) projects = projs clean_index = False projects_length = len(projects) project_model_list = ['Project'] last_entry = get_last_model_update_log_entry('projects', project_model_list) project_types = Project.objects.all().values_list('project_type', flat=True) counter = Counter(project_types) ord_dict = OrderedDict(sorted(counter.items(), key=lambda t: t[1])) items = ord_dict.items() status_info = Project.objects.all().values_list('status', flat=True) status_items = OrderedDict( sorted(Counter(status_info).items(), key=lambda t: t[1])).items() tags_id_info = Project.objects.all().values_list('tags', flat=True) tags_info = Tag.objects.filter( id__in=tags_id_info).order_by('name').values_list('name', flat=True) roles_id = AssignedPerson.objects.all().distinct().values_list('role', flat=True) roles = Role.objects.filter(id__in=roles_id).order_by('name') # Retrieves all the full names of authors. participants_info = AssignedPerson.objects.all() \ .distinct('person__full_name').order_by() \ .values_list('person__full_name', flat=True) # dictionary to be returned in render(request, ) return_dict = { 'clean_index': clean_index, 'form': form, 'last_entry': last_entry, 'project_type': project_type, 'project_type_info': dict(items), 'project_status_info': dict(status_items), 'project_tags_info': tags_info, 'projects': projects, 'projects_length': projects_length, 'query_string': query_string, 'status': status, 'tag': tag, 'roles': roles, 'form_start_date': start_date, 'form_start_range': start_range, 'form_end_date': end_date, 'form_end_range': end_range, 'form_project_types': form_project_types, 'form_project_status': form_project_status, 'form_tags': form_tags, 'form_funds_range': form_funds_range, 'form_from_total_funds': form_from_total_funds, 'form_to_total_funds': form_to_total_funds, 'form_participants_name': form_participants_name, 'form_participants_role': form_participants_role, 'participants_info': participants_info, 'web_title': u'Projects', } return render(request, "projects/index.html", return_dict)
def project_index(request, tag_slug=None, status_slug=None, project_type_slug=None, query_string=None): tag = None status = None project_type = None clean_index = False if tag_slug: tag = get_object_or_404(Tag, slug=tag_slug) project_ids = ProjectTag.objects.filter(tag=tag).values('project_id') projects = Project.objects.filter(id__in=project_ids) if status_slug: status = status_slug.replace('-', ' ').capitalize() projects = Project.objects.filter(status=status) if project_type_slug: project_type = titleize(project_type_slug).capitalize() projects = Project.objects.filter(project_type=project_type) if not tag_slug and not status_slug and not project_type_slug: clean_index = True projects = Project.objects.all() projects = projects.order_by('-start_year', '-end_year', 'full_name') if request.method == 'POST': form = ProjectSearchForm(request.POST) if form.is_valid(): query_string = form.cleaned_data['text'] return HttpResponseRedirect( reverse('view_project_query', kwargs={'query_string': query_string})) else: form = ProjectSearchForm() if query_string: query = slugify(query_string) projs = [] person_ids = Person.objects.filter(slug__contains=query).values('id') project_ids = AssignedPerson.objects.filter( person_id__in=person_ids).values('project_id') project_ids = set([x['project_id'] for x in project_ids]) for project in projects: if (query in slugify(project.full_name)) or (project.id in project_ids): projs.append(project) projects = projs clean_index = False projects_length = len(projects) last_created = Project.objects.order_by('-log_created')[0] last_modified = Project.objects.order_by('-log_modified')[0] project_types = Project.objects.all().values_list('project_type', flat=True) counter = Counter(project_types) ord_dict = OrderedDict(sorted(counter.items(), key=lambda t: t[1])) items = ord_dict.items() # dictionary to be returned in render_to_response() return_dict = { 'web_title': u'Projects', 'clean_index': clean_index, 'form': form, 'last_created': last_created, 'last_modified': last_modified, 'project_type': project_type, 'project_type_info': dict(items), 'projects': projects, 'projects_length': projects_length, 'query_string': query_string, 'status': status, 'tag': tag, } return render_to_response("projects/index.html", return_dict, context_instance=RequestContext(request))
# © 2019, Drew Goodman, all rights reserved import requests from inflection import titleize from bs4 import BeautifulSoup url = requests.get("https://www.dailysmarty.com/topics/python") html_text = BeautifulSoup(url.text, 'html.parser') # print(html_text.prettify()) print( f"\n <<< {html_text.title.string.lstrip('DailySmarty | ').upper()}: >>> \n" ) for link in html_text.find_all('a'): link_clean = link.get('href') if "posts/" in link_clean: link_clean = titleize(link_clean.lstrip("/posts/")) print(f' - "{link_clean}"')
def parse(self): """ Open and parse the ``Distelec.txt`` file. After parsing the following attributes will be available: :var provinces: Dictionary with province id as key and name as value. :var cantons: Dictionary with a tuple ``(province id, canton id)`` as key and name as value. :var districts: Dictionary with a tuple ``(province id, canton id, district id)`` as key and name as value. """ with open_with_encoding(self._filename, 'rb', 'iso8859-15') as fd: for linenum, line in enumerate(fd, 1): line = line.strip() if not line: log.warning( 'Distelec.txt :: Ignoring empty line #{}'.format( linenum)) continue try: parts = line.split(',') assert len(parts) == 4 # Get codes code = int(parts[0]) # Insert province province_code = code // 100000 province_name = titleize(parts[1].strip()) if province_code in self.provinces: assert self.provinces[province_code] == province_name else: self.provinces[province_code] = province_name # Insert canton canton_code = (code % 100000) // 1000 canton_key = (province_code, canton_code) canton_name = titleize(parts[2].strip()) if canton_code in self.cantons: assert self.cantons[canton_key] == canton_name else: self.cantons[canton_key] = canton_name # Insert district district_code = code % 1000 district_key = (province_code, canton_code, district_code) district_name = titleize(parts[3].strip()) if district_code in self.districts: assert self.districts[district_key] == district_name else: self.districts[district_key] = district_name except Exception: self._bad_data.append(linenum) log.error( 'Distelec.txt :: Bad data at line #{}:\n{}'.format( linenum, line)) log.debug(format_exc()) continue
def __get_job_data(member): company = None first_job = None last_job = None position = None organizations = Organization.objects.filter(id__in=UNIT_ORGANIZATION_IDS) try: jobs = Job.objects.filter(person_id=member.id, organization_id__in=organizations).order_by('end_date') first_job = jobs[0] last_job = jobs.reverse()[0] organization = Organization.objects.get(id=last_job.organization_id) company = organization.short_name position = last_job.position except: pass pr_role = Role.objects.get(slug='principal-researcher') project_ids = AssignedPerson.objects.filter(person_id=member.id).exclude(role_id=pr_role.id).values('project_id') publication_ids = PublicationAuthor.objects.filter(author=member.id).values_list('publication_id', flat=True) accounts = [] account_profiles = AccountProfile.objects.filter(person_id=member.id).order_by('network__name') for account_profile in account_profiles: network = Network.objects.get(id=account_profile.network_id) account_item = { 'base_url': network.base_url, 'icon_url': network.icon, 'network_name': network.name, 'profile_id': account_profile.profile_id, } accounts.append(account_item) publication_types = [] for publication_type in member.publications.all().values_list('child_type', flat=True): publication_types.append(titleize(publication_type).lower()) counter = Counter(publication_types) ord_dict = OrderedDict(sorted(counter.items(), key=lambda t: t[1])) items = ord_dict.items() try: Thesis.objects.get(author_id=member.id) has_thesis = True except: has_thesis = False return { 'accounts': accounts, 'company': company, 'first_job': first_job, 'has_thesis': has_thesis, 'last_job': last_job, 'number_of_projects': len(project_ids), 'number_of_publications': len(publication_ids), 'position': position, 'pubtype_info': dict(items), }
def convert_name(self, name): return titleize(name.replace("HTTP_", "")).replace(" ", "-")
def convert(name): return inflection.titleize(inflection.underscore(name))
def humanize(s): return inflection.titleize(s)
def member_publications(request, person_slug, publication_type_slug=None): person_status = __determine_person_status(person_slug) # Redirect to correct URL template if concordance doesn't exist if (person_status == MEMBER) and ('/' + MEMBER not in request.path): return HttpResponseRedirect( reverse('member_publications', kwargs={'person_slug': person_slug})) if (person_status == FORMER_MEMBER) and ('/' + FORMER_MEMBER not in request.path): return HttpResponseRedirect( reverse('former_member_publications', kwargs={'person_slug': person_slug})) member = get_object_or_404(Person, slug=person_slug) JCR_TITLE = 'JCR indexed journal article' publications = OrderedDict() publications[JCR_TITLE] = [] publications['conference-paper'] = [] publications['book'] = [] publications['book-section'] = [] publications['journal-article'] = [] publications['magazine-article'] = [] if publication_type_slug: publication_type = titleize(publication_type_slug).replace(' ', '') publication_ids = member.publications.filter( child_type=publication_type).values_list('id', flat=True) if not publication_ids: raise Http404 else: publication_ids = member.publications.all().values_list('id', flat=True) publication_items = Publication.objects.select_related( 'conferencepaper', 'conferencepaper__parent_proceedings', 'booksection', 'booksection__parent_book', 'journalarticle', 'journalarticle__parent_journal', 'magazinearticle', 'magazinearticle__parent_magazine', ).prefetch_related('publicationauthor_set__author').filter( id__in=publication_ids).order_by('-published', 'title') has_publications = True if publication_ids else False for publication_item in publication_items: title = publication_item.title slug = publication_item.slug bibtex = publication_item.bibtex year = publication_item.year pdf = publication_item.pdf if publication_item.pdf else None publication_authors = publication_item.publicationauthor_set.all() sorted_publication_authors = sorted( publication_authors, lambda x, y: cmp(x.position, y.position)) author_list = [ pubauthor.author.full_name for pubauthor in sorted_publication_authors ] authors = ', '.join(author_list) parent_title = None impact_factor = None if publication_item.child_type == 'ConferencePaper': conference_paper = publication_item.conferencepaper parent_title = conference_paper.parent_proceedings.title if conference_paper.parent_proceedings else '' elif publication_item.child_type == 'BookSection': book_section = publication_item.booksection parent_title = book_section.parent_book.title if book_section.parent_book else '' elif publication_item.child_type == 'JournalArticle': journal_article = publication_item.journalarticle parent_title = journal_article.parent_journal.title if journal_article.parent_journal else '' impact_factor = journal_article.impact_factor elif publication_item.child_type == 'MagazineArticle': magazine_article = publication_item.magazinearticle parent_title = magazine_article.parent_magazine.title if magazine_article.parent_magazine else '' publication_dict = { 'title': title, 'slug': slug, 'bibtex': bibtex, 'year': year, 'pdf': pdf, 'authors': authors, 'parent_title': parent_title, 'impact_factor': impact_factor, } if publication_item.child_type == 'JournalArticle': if journal_article.impact_factor is not None: child_type = 'JCR indexed journal article' else: child_type = 'JournalArticle' else: child_type = publication_item.child_type if not child_type in publications.keys(): publications[child_type] = [] publications[child_type].append(publication_dict) all_thesis = Thesis.objects.filter(author_id=member.id).all() return_dict = { 'has_publications': has_publications, 'inside_category': publication_type_slug is not None, 'member': member, 'publications': publications, 'thesis': all_thesis, 'web_title': u'%s - Publications' % member.full_name, 'current_page': 'publications', } data_dict = __get_job_data(member) return_dict.update(data_dict) return render(request, "members/publications.html", return_dict)
def post_format(url): if 'posts' in url: url = url.split('/')[-1] url = url.replace('-', ' ') url = titleize(url) titles.append(url)
def __get_job_data(member): company = None company_slug = None first_job = None last_job = None position = None organizations = Organization.objects.filter(id__in=UNIT_ORGANIZATION_IDS) try: jobs = Job.objects.filter( person_id=member.id, organization_id__in=organizations).order_by('end_date') first_job = jobs[0] last_job = jobs.reverse()[0] organization = Organization.objects.get(id=last_job.organization_id) company = organization.short_name company_slug = organization.slug position = last_job.position except: pass # Extracting datasets information dataset_ids = DatasetAuthor.objects.filter(author=member.id).values_list( 'dataset_id', flat=True) project_ids = AssignedPerson.objects.filter(person_id=member.id) project_ids = project_ids.order_by('role__relevance_order').values( 'project_id') publication_ids = PublicationAuthor.objects.filter( author=member.id).values_list('publication_id', flat=True) accounts = [] account_profiles = AccountProfile.objects.filter( person_id=member.id).order_by('network__name') for account_profile in account_profiles: network = Network.objects.get(id=account_profile.network_id) account_item = { 'base_url': network.base_url, 'icon_url': network.icon, 'network_name': network.name, 'profile_id': account_profile.profile_id, } accounts.append(account_item) publication_types = [] for publication_type in member.publications.all().values_list('child_type', flat=True): publication_types.append(titleize(publication_type).lower()) counter = Counter(publication_types) ord_dict = OrderedDict(sorted(counter.items(), key=lambda t: t[1])) items = ord_dict.items() assigned_persons = AssignedPerson.objects.filter(person=member) assigned_persons = assigned_persons.order_by( 'role__relevance_order', ).values_list('role__name', flat=True) role_items = OrderedDict() for project_role in assigned_persons: role_items[project_role] = role_items.get(project_role, 0) + 1 if Thesis.objects.filter(author_id=member.id).count(): has_thesis = True else: has_thesis = False if PersonRelatedToAward.objects.filter(person_id=member.id).count(): has_awards = True else: has_awards = False if PersonRelatedToTalkOrCourse.objects.filter(person_id=member.id).count(): has_talks = True else: has_talks = False if PersonRelatedToContribution.objects.filter(person_id=member.id).count(): has_contributions = True else: has_contributions = False if PersonRelatedToNews.objects.filter(person_id=member.id).count(): has_news = True else: has_news = False if has_awards and has_talks and has_contributions and has_news: header_rows = 2 else: header_rows = 1 if not has_awards and not has_talks and not has_contributions and not has_news and len(project_ids) == 0 \ and len(publication_ids) == 0 and len(dataset_ids) == 0: display_bio = False else: display_bio = True if has_thesis: number_of_publications = len(publication_ids) + 1 else: number_of_publications = len(publication_ids) return { 'accounts': accounts, 'company': company, 'company_slug': company_slug, 'display_bio': display_bio, 'first_job': first_job, 'has_awards': has_awards, 'has_contributions': has_contributions, 'has_news': has_news, 'has_talks': has_talks, 'has_thesis': has_thesis, 'header_rows': header_rows, 'last_job': last_job, 'number_of_projects': len(project_ids), 'number_of_publications': number_of_publications, 'number_of_datasets': len(dataset_ids), 'position': position, 'pubtype_info': dict(items), 'role_items': role_items, }
def __finfo(self, ax, key): title = Inf.titleize(key) ax.set_title(title, fontsize=20) ax.set_xlabel("Candidate ID", fontsize=14) ax.set_ylabel("Slide ID", fontsize=14) ax.get_xaxis().tick_bottom()