コード例 #1
0
ファイル: Bonus.py プロジェクト: grapesmoker/packet_parser
    def __init__(self, leadin='', parts=None, answers=None, values=None, number='',
                 packet=None, tournament=None):
        self.leadin = leadin
        if parts: self.parts = parts
        else: self.parts = []

        if answers: self.answers = answers
        else: self.answers = []

        if values: self.values = values
        else: self.values = []

        self.number = number
        self.packet = packet
        self.tournament = tournament

        self.leadin = re.sub(num_regex, '', self.leadin)
        self.leadin = re.sub(tb_regex, '', self.leadin)
        
        if self.leadin.startswith('<strong>'):
            self.leadin = '<strong>' + re.sub('^[\d]+\.[\s]*', '', self.leadin[8:])
            self.leadin = '<strong>' + re.sub('^TB\.[\s]*', '', self.leadin[8:])

        def clean_answer(ans):
            ans = ans.replace('<strong><em>', '<req>')
            ans = ans.replace('<em><strong>', '<req>')
            ans = ans.replace('</em></strong>', '</req>')
            ans = ans.replace('</strong></em>', '</req>')
            return ans

        self.answers = [clean_answer(answer) for answer in self.answers]
        self.answers_sanitized = [sanitize(answer) for answer in self.answers]
        self.leadin_sanitized = sanitize(self.leadin)
        self.parts_sanitized = [sanitize(part) for part in self.parts]
コード例 #2
0
ファイル: Packet.py プロジェクト: alopezlago/packet_parser
    def prepare_html_file(self, html_file, skip_lines=0):
    
        with codecs.open(html_file, 'r', encoding='utf-8') as f:
            packet_contents = f.read()

        packet_contents = re.sub('<br />', '\n', packet_contents)
            
        packet_contents = map(lambda x: sanitize(x, valid_tags=['em', 'strong']),
                              packet_contents.split('\n'))

        # Skip all of the packet information at the beginning
        first_line_with_number_index = 0
        for line in packet_contents:
            if starts_with_number(line):
                break
            first_line_with_number_index += 1
        
        packet_contents = packet_contents[first_line_with_number_index:]

        packet_contents = [x.strip() for x in packet_contents if sanitize(x).strip() != ''
                           and len(x) > 20
                           and (not re.search('Tossups', x, flags=re.I))
                           and (not re.search('Bonuses', x, flags=re.I))]
    
        return packet_contents[skip_lines:]
コード例 #3
0
    def __init__(self, polynomial, divisor, verbose=True):
        # santize the input - remove spaces
        self.polynomial = utils.sanitize(polynomial)
        self.divisor = utils.sanitize(divisor)
        self.verbose = verbose

        if "x" not in self.polynomial:
            self.throw_no_x("polynomial")
        elif "x" not in self.divisor:
            self.throw_no_x("divisor")

        self.log_work("\n-----")
        self.log_work("Input:")
        self.log_work("\tPolynomial:\t", self.polynomial)
        self.log_work("\tDivisor:\t", self.divisor)
        self.log_work("Processing:")
        # get the coefficients of the polynomial
        orderedCoefs = self.process_polynomial()
        # divide the polynomial through long division
        results_divide = self.process_divisor(orderedCoefs)
        self.quotient, self.remainder = self.combine_result(results_divide)
        self.log_work("Result:")
        self.log_work("\tQuotient:\t", self.quotient)
        self.log_work("\tRemainder:\t", self.remainder)
        self.log_work("-----")
コード例 #4
0
	def parse_households(self, tr_elem):
		if 'households' not in tr_elem: return

		if type(tr_elem['households']) == type([]):
			tr_elem['households'] = sanitize(tr_elem['households'][0])
		else:
			tr_elem['households'] = sanitize(tr_elem['households'])
		tr_elem['households'] = tr_elem['households'].replace(',', '')
		tr_elem['households'] = int(tr_elem['households'])
コード例 #5
0
	def parse_electorate(self, tr_elem):
		if 'electorates' not in tr_elem: return

		if type(tr_elem['electorates']) == type([]):
			tr_elem['electorates'] = sanitize(tr_elem['electorates'][0])
		else:
			tr_elem['electorates'] = sanitize(tr_elem['electorates'])
		tr_elem['electorates'] = tr_elem['electorates'].replace(',', '')
		tr_elem['electorates'] = int(tr_elem['electorates'])
コード例 #6
0
	def parse_population(self, tr_elem):
		if 'population' not in tr_elem: return

		if type(tr_elem['population']) == type([]): #nth != 20
			tr_elem['population'] = sanitize(tr_elem['population'][0])
		else:
			tr_elem['population'] = sanitize(tr_elem['population'])
		tr_elem['population'] = tr_elem['population'].replace(',', '')
		tr_elem['population'] = int(tr_elem['population'])
コード例 #7
0
	def parse_counted_votes(self, consti):
		if 'counted_votes' not in consti: return

		if type(consti['counted_votes']) == type([]): #역대선거
			consti['counted_votes'] = sanitize(consti['counted_votes'][0])
		else: #최근선거
			consti['counted_votes'] = sanitize(consti['counted_votes'])
		consti['counted_votes'] = consti['counted_votes'].replace(',', '')
		consti['counted_votes'] = int(consti['counted_votes'])
コード例 #8
0
	def parse_electorate(self, consti):
		if 'electorates' not in consti: return

		if type(consti['electorates']) == type([]): #역대선거
			consti['electorates'] = sanitize(consti['electorates'][0])
		else: #최근선거
			consti['electorates'] = sanitize(consti['electorates'])
		consti['electorates'] = consti['electorates'].replace(',', '')
		consti['electorates'] = int(consti['electorates'])
コード例 #9
0
ファイル: Tossup.py プロジェクト: alchzh/QuizDB
 def to_dict(self):
     return {
         "number": self.number,
         "formatted_text": self.text,
         "formatted_answer": self.answer,
         "text": sanitize(self.text, valid_tags=[]),
         "answer": sanitize(self.answer, valid_tags=[]),
         "category": self.category,
         "subcategory": self.subcategory,
         "tournament": self.tournament,
         "round": self.round
     }
コード例 #10
0
 def to_dict(self):
     return {
         "number": self.number,
         "leadin": self.leadin,
         "formatted_texts": self.texts,
         "formatted_answers": self.answers,
         "texts": map(lambda x: sanitize(x, valid_tags=[]), self.texts),
         "answers": map(lambda x: sanitize(x, valid_tags=[]), self.answers),
         "category": self.category,
         "subcategory": self.subcategory,
         "tournament": self.tournament,
         "round": self.round
     }
コード例 #11
0
ファイル: Bonus.py プロジェクト: Innoviox/QuizDB
 def to_dict(self):
     return {
         "number": self.number,
         "leadin": self.leadin,
         "formatted_texts": self.texts,
         "formatted_answers": self.answers,
         "texts": [sanitize(t, valid_tags=[]) for t in self.texts],
         "answers": [sanitize(a, valid_tags=[]) for a in self.answers],
         "category": self.category,
         "subcategory": self.subcategory,
         "tournament": self.tournament,
         "round": self.round
     }
コード例 #12
0
def plot_boundary(X, y, coefs, title='', save=False):
    x_min, x_max = np.min(X[:, 0]), np.max(X[:, 0])
    y_min, y_max = np.min(X[:, 1]), np.max(X[:, 1])
    offset = 1
    boundary_x = np.linspace(x_min - offset, x_max + offset, 1000)
    boundary_y = [
        (utils.logit(1 / 2) - coefs['b'] - coefs['w1'] * x) / coefs['w2']
        for x in boundary_x
    ]

    plt.figure(figsize=(9, 9))
    plt.plot(boundary_x, boundary_y, c='g', label='Decision boundary')
    plt.scatter(X[y == 0, 0], X[y == 0, 1], label='class 0')
    plt.scatter(X[y == 1, 0], X[y == 1, 1], label='class 1')

    plt.xlim(x_min - offset, x_max + offset)
    plt.ylim(y_min - offset, y_max + offset)
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title(title)
    plt.legend(loc="upper right", fontsize=16)
    if save:
        print("Save figure results folder.")
        plt.savefig('results/{}'.format(utils.sanitize(title)))
    plt.show()
コード例 #13
0
def view_format_compare():
  result = {'errmsg':'', 'formatlist':[], 'class':""}
  if(request.method == "GET"):
    return render_template('format_compare.html', **result)
  else:
    cls = utils.sanitize(request.form['cls'])
    result['class'] = cls
    if(cls=='medic'):
      qrystr = """SELECT PF.format, AVG(PF.healsPerMin) as avg_HPM, AVG(CAST(PF.ubers AS decimal)/CAST(PF.drops AS decimal)) as avg_UD_rate
FROM PlaysFormat PF
WHERE class=%s AND PF.drops <> 0
GROUP BY PF.format;"""
      formatlist_ptr = g.conn.execute(qrystr, (cls))
      for record in formatlist_ptr:
        print record
        result['formatlist'].append(dict(zip(['format', 'avg_hpm', 'avg_udrate'], [record[0], record[1], float(record[2])] )))
    else:
      qrystr = """SELECT PF.format, AVG(PF.kad) as avg_KAD, AVG(PF.damagepermin) as avg_DPM
FROM PlaysFormat AS PF
WHERE class=%s AND PF.deaths <> 0 
GROUP BY PF.format;"""
      formatlist_ptr = g.conn.execute(qrystr, (cls))
      for record in formatlist_ptr:
        print record
        result['formatlist'].append(dict(zip(['format', 'avg_kad', 'avg_dpm'], record)))
    
    return render_template('format_compare.html', **result)
コード例 #14
0
ファイル: event.py プロジェクト: josbouten/bioplot
    def onEvent(self, event):
        """
        Callback function handling button presses
        will write figure to disk.

        :param event: button code
        :return: nothing
        """
        if self.debug:
            print(("You pressed key {:s}".format(event.key)))
        filename = self.title + "_" + self.plotType + '.png'

        # Spaces in filenames are a nuisance.
        filename = sanitize(filename)

        try:
            if not os.path.exists(self.config.getOutputPath()):
                makedirs(self.config.getOutputPath())
        except Exception as e:
            print(e)
            sys.exit(1)

        # Note: l, k, g, s and f are predefined keys
        # With them you can:
        # k: toggle between lin horizontal scale and log horizontal scale
        # l: toggle between lin vertical scale and log vertical scale
        # s: open save menu
        # f: toggle between standard size and full screen
        # any other key will make that the file is saved in its current dimensions.
        # To get a nice plot it is wise to maximise and then press any key. Then close
        # the window.

        path = self.config.getOutputPath() + os.path.sep + filename
        self.fig.savefig(path, bbox_inches=0)
        print('Figure was saved to:', path)
コード例 #15
0
    def onEvent(self, event):
        """
        Callback function handling button presses
        will write figure to disk.

        :param event: button code
        :return: nothing
        """
        if self.debug:
            print(("You pressed key {:s}".format(event.key)))
        filename = self.title + "_" + self.plotType + '.png'

        # Spaces in filenames are a nuisance.
        filename = sanitize(filename)

        try:
            if not os.path.exists(self.config.getOutputPath()):
                makedirs(self.config.getOutputPath())
        except Exception as e:
            print(e)
            sys.exit(1)

        # Note: l, k, g, s and f are predefined keys
        # With them you can:
        # k: toggle between lin horizontal scale and log horizontal scale
        # l: toggle between lin vertical scale and log vertical scale
        # s: open save menu
        # f: toggle between standard size and full screen
        # any other key will make that the file is saved in its current dimensions.
        # To get a nice plot it is wise to maximise and then press any key. Then close
        # the window.

        path = self.config.getOutputPath() + os.path.sep + filename
        self.fig.savefig(path, bbox_inches=0)
        print('Figure was saved to:', path)
コード例 #16
0
        def do_parse(ruthless):
            try:
                html = deepcopy(self.html)
                for i in utils.tags(html, 'script', 'style'):
                    i.drop_tree()
                for i in utils.tags(html, 'body'):
                    i.set('id', 'readabilityBody')
                if ruthless:
                    html = utils.remove_unlikely_candidates(html)
                html = utils.transform_misused_divs_into_paragraphs(html)

                candidates = utils.score_paragraphs(html)

                # first try to get an article
                article_node = utils.get_article_element(html)
                if article_node:
                    best_candidate = article_node
                else:
                    best_candidate = select_best_candidate(candidates)

                if best_candidate:
                    # TODO: there was some logic here about retrying if the article wasn't long enough
                    return utils.sanitize(utils.get_article(candidates, best_candidate), candidates)
                else:
                    return None
            except StandardError, e:
                log.exception('error getting summary: ')
                raise Unparseable(str(e)), None, sys.exc_info()[2]
コード例 #17
0
ファイル: builder.py プロジェクト: jayzeng/us-shapes
    def build_state_shapes(self, outfile, raw_geodir='raw_geoshapes', raw_geofile='raw_shapes_state.json'):

        print "Building state shape files"

        shapefiles_dir = download_state_shapes()
        geofile_path = self.converter.to_geojson(raw_geofile=raw_geofile, raw_geodir=raw_geodir,
                                                 shapefile_prefix='state', shapefiles_dir=shapefiles_dir)

        # Format results

        raw_geo_re = re.compile(
            '^\{.*?: \{.*?"STUSPS": "([^"]+)", "NAME": "([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$')

        doc_template = '{"id": "%(id)s", "state": "%(state)s", "postal": "%(postal)s", "geometry": %(coordinates)s}\n'

        with open(outfile, 'a') as out:
            print "Formatting %s into output file %s" % (geofile_path, outfile)

            for line in fileinput.input(geofile_path):
                is_good_line = self.good_line_re.match(line)
                if is_good_line is None:
                    continue

                m = raw_geo_re.match(line)

                (postal, state, id, coordinates) = (m.group(1), m.group(2), sanitize(m.group(2)), m.group(3))

                data = {
                'id': id,
                'state': state,
                'postal': postal,
                'coordinates': coordinates
                }

                out.write(doc_template % data)
コード例 #18
0
async def define(self, message, args):
    """Sets the description to a mapping

    Arguments:
        self {discordClient} -- Needed
        message {discordMessage} -- The actual message that invoked this command
        args {list[str]} -- Everything that is after the command
    """
    guild_id = message.guild.id
    if not args:
        return await disc.error_message(message,
                                        title="Error",
                                        desc="No arguments were found")
    if len(args) <= 1:
        return await disc.error_message(message,
                                        title="Error",
                                        desc="No definition was given")

    name = utils.sanitize(args[0])
    res = db.mappings_exists(guild_id, name)
    if not res:
        return await disc.send_message(message,
                                       title="Error",
                                       desc=f"{name} was not a valid mapping")

    definition = " ".join(args[1:])

    db.mappings_def(guild_id, name, definition)
    await disc.send_message(
        message,
        title="Definition set",
        desc=f"The mapping {name} has been defined to `{definition}`")
コード例 #19
0
ファイル: Bonus.py プロジェクト: alopezlago/packet_parser
    def __init__(self, leadin='', parts=[], answers=[], values=[], number='',
                 packet=None, tournament=None):
        self.leadin = leadin
        self.parts = parts
        self.answers = answers
        self.number = number
        self.values = values
        self.packet = packet
        self.tournament = tournament

        self.leadin = re.sub(num_regex, '', self.leadin)
        self.leadin = re.sub(tb_regex, '', self.leadin)
        
        if self.leadin.startswith('<strong>'):
            self.leadin = '<strong>' + re.sub('^[\d]+\.[\s]*', '', self.leadin[8:])
            self.leadin = '<strong>' + re.sub('^TB\.[\s]*', '', self.leadin[8:])

        def clean_answer(ans):
            ans = ans.replace('<strong><em>', '<req>')
            ans = ans.replace('<em><strong>', '<req>')
            ans = ans.replace('</em></strong>', '</req>')
            ans = ans.replace('</strong></em>', '</req>')
            return ans

        self.answers = map(clean_answer, self.answers)

        self.answers_sanitized = map(sanitize, self.answers)
        self.leadin_sanitized = sanitize(self.leadin)
        self.parts_sanitized = map(sanitize, self.parts)
コード例 #20
0
async def unmap(self, message, args):
    """Unmaps a command, it removes the file and the mapping in the database

    Arguments:
        self {discordClient} -- Needed
        message {discordMessage} -- The actual message that invoked this command
        args {list[str]} -- Everything that is after the command
    """
    guild_id = message.guild.id
    if not args:
        return await disc.error_message(message,
                                        title="Error",
                                        desc="No arguments were found")

    name = utils.sanitize(args[0])
    res = db.mappings_exists(guild_id, name)
    if not res:
        return await disc.send_message(message,
                                       title="Error",
                                       desc=f"{name} was not a valid mapping")

    db.mappings_rm(guild_id, name)
    await disc.send_message(message,
                            title="Success !",
                            desc=f"You have successfully unmapped {name}")
コード例 #21
0
 def handle(self, *args, **options):
     if options['debug']: logging.basicConfig(level=logging.DEBUG)
     count = 0
     with open(options['file'], 'r') as file:
         for word in file:
             Word(word=sanitize(word)).save()
             count += 1
             if options['limit'] == count: break
     self.stdout.write('Successfully loaded {} words'.format(count))
コード例 #22
0
    def __init__(self, dataset, name, file_id=None):
        self.dataset = dataset
        self.name = sanitize(name)
        self.id = file_id

        self.download_url = 'http://{0}/api/access/datafile/{1}'.format(
            dataset.connection.host, self.id)
        self.edit_media_uri = 'https://{0}/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/{1}'.format(
            dataset.connection.host, self.id)
コード例 #23
0
ファイル: query.py プロジェクト: pombredanne/hitsearch
def get_results(query, sort_type='authority', beta=0.0):
    """Queries the database for Page objects matching a query, gets their HITS
       values, weights them by word frequency, and returns a list of sorted
       results.
       sort_type can be either authority or hubbiness
       beta is the weight of the HITS results vs. the word frequency
       when beta=0.0 HITS results are alll that matters and word frequency is multiplied by zero
       when beta=1.0 results are based solely on word frequency."""

    terms = query.split()
    terms = [utils.sanitize(term).lower() for term in terms] # strips accents and punctuation
    
    pages = Page.objects.select_related('tag__word_count')

    for term in terms:
        pages = pages.filter(tag__tag=term)

    if pages:
        # links_query is a list of tuple links, where links_query[i] = (source_url, target_url)
        links_query = Link.objects.filter(source__in=pages).values_list('source__url', 'target__url')
        
        # now build the links dict to pass into HITS
        links = dict([(page.url, []) for page in pages])
        for link in links_query:
            links[link[0]].append(link[1])
        #[links[link[0]].append(link[1]) for link in links_query]

        # run HITS
        (authority, hubbiness) = HITS.HITS(links)

        # give a boost based on word frequency
        tags_query = Tag.objects.filter(page__in=pages).values_list('page__url', 'word_count', 'tag')
        
        # creates a dictionary of {page.url: [count of all terms in page, count of all tags in page]
        tags = dict([(page.url, [0, 0]) for page in pages])
        for tag in tags_query:
            if tag[2] in terms: # term
                tags[tag[0]][0] += tag[1] # add to count of search terms in page
            tags[tag[0]][1] += tag[1] # add to count of all tags in page
        
        # assign the pages hubbiness and authority and weights with term frequency   
        for page in pages:
            page.authority = (1 - beta) * authority[page.url] + beta * tags[page.url][0] / float(tags[page.url][1])
            page.hubbiness = (1 - beta) * hubbiness[page.url] + beta * tags[page.url][0] / float(tags[page.url][1])

        # sort the pages
        if sort_type == 'hubbiness':
            sorter = lambda page: (page.hubbiness, page.authority)
        else:
            sorter = lambda page: (page.authority, page.hubbiness)

        # set the results
        results = sorted(pages, key=sorter, reverse=True)
    else:
        results = []

    return results
コード例 #24
0
ファイル: Packet.py プロジェクト: Innoviox/QuizDB
 def load_html(self):
     with codecs.open(self.filename, 'r', encoding='utf-8') as f:
         lines = f.readlines()
     prepared_lines = []
     for l in lines:
         split_lines = re.split('<br\s*/?>', l)
         for split_line in split_lines:
             sanitized_line = sanitize(split_line).strip()
             if is_valid_content(sanitized_line, strippable_lines_res=self.strippable_lines_res):
                 prepared_lines.append(sanitized_line)
     return prepared_lines
コード例 #25
0
def add():
    try:
        thing = request.args.get("thing")
    except AttributeError:
        return "ERROR: That doesn't look right. URL param `thing` can't be empty."

    if (thing != "") and (thing != None) and (thing != " "):
        database.add(utils.sanitize(thing))
        return "[✓] Thanks for reporting that Jeffrey dropped " + thing
    else:
        return "[⚠] ERROR: That doesn't look right. URL param `thing` can't be empty."
コード例 #26
0
    def __init__(self, dataset, name, file_id=None):
        self.dataset = dataset
        self.name = sanitize(name)
        self.id = file_id

        self.download_url = 'http://{0}/api/access/datafile/{1}'.format(
            dataset.connection.host, self.id
        )
        self.edit_media_uri = 'https://{0}/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/{1}'.format(
            dataset.connection.host, self.id
        )
コード例 #27
0
	def parse_candi(self, candi):
		if self.candidate_type == 'party_candidate': # candi['name'] == <th><strong>새정치민주연합<br>박원순</strong></th>
			[candi['party_name_kr'], candi['name_kr']] = list(map(sanitize, candi['name'][:2]))
			del candi['name']

		elif self.candidate_type == 'party_list': # candi['name'] == <th><strong>새정치민주연합</strong></th>
			candi['party_name_kr'] = sanitize(candi['name'])
			del candi['name']

		elif self.candidate_type == 'independent_candidate': # candi['name'] == <th><strong>조희연</strong></th>
			candi['name_kr'] = sanitize(candi['name'])
			del candi['name']

		else:
			raise NotImplementedError("잘못된 candidate_type이 들어옴: one of three, 'party_candidate', 'party_list', or 'independent_candidate'")

		[candi['votenum'], candi['voterate']] = list(map(sanitize, candi['vote'][:2]))
		candi['votenum'] = candi['votenum'].replace(',', '')
		candi['votenum'] = int(candi['votenum'])
		candi['voterate'] = float(candi['voterate'])
		del candi['vote']
コード例 #28
0
ファイル: formatter.py プロジェクト: knockrentals/us-shapes
    def format_neighborhood_shapes(self, outfile):
        if isfile(outfile):
            print "%s already exists, skipping formatting geofile" % outfile
            return

        print "Building neighborhood shape files"

        shapefiles_dir = loader.download_neighborhood_shapes()

        raw_geofile = '%s/raw_shapes_neighborhood.json' % self.raw_geo_dir
        self.converter.to_geojson(outfile=raw_geofile, shapefile_prefix='neighborhood', shapefiles_dir=shapefiles_dir)

        # Format results

        raw_geo_re = re.compile(
            '^(\{.*?)"STATE":\s*"([^"]+)".*?"CITY":\s*"([^"]+)".\s*"NAME":\s*"([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$')
        leading_zero_re = re.compile('([+-])0')

        doc_template = '{"id": "%(id)s", "state": "%(state)s", "city": "%(city)s", "neighborhood": "%(neighborhood)s", "center_lat": %(center_lat)s, "center_lon": %(center_lon)s, "geometry": %(coordinates)s}\n'

        with open(outfile, 'a') as out:
            print "Formatting %s into output file %s" % (raw_geofile, outfile)

            for line in fileinput.input(raw_geofile):
                is_good_line = self.good_line_re.match(line)
                if is_good_line is None:
                    continue

                m = raw_geo_re.match(line)

                (neighborhood, city, state, coordinates) = (m.group(4), m.group(3), m.group(2), m.group(5))
                id = sanitize("%s_%s_%s" % (neighborhood, city, state))

                # hack because there's no center included in neighborhood shape file
                # we'll use the first coord we find as the center
                c = json.loads(coordinates)['coordinates']
                while type(c) is list and len(c) and type(c[0]) is list:
                    c = c[0]

                center_lat = c[1]
                center_lon = c[0]

                data = dict(
                    id=id,
                    neighborhood=neighborhood,
                    city=city,
                    state=state,
                    center_lat=center_lat,
                    center_lon=center_lon,
                    coordinates=coordinates
                )

                out.write(doc_template % data)
コード例 #29
0
ファイル: routes.py プロジェクト: poulp/randomfoOd
def add_action():
    """ Ajouter une action """
    store_file = STORE['actions']
    label = request.json['label']

    uri = create_uri(BASE_URI_ACTION + sanitize(label))

    load_rdf_file(store_file)
    Action(resUri=uri, label=label)
    save_rdf_file(store_file)

    # On retourne une chaine vide pour renvoyer un code HTTP 200
    return jsonify({'uri': uri})
コード例 #30
0
ファイル: routes.py プロジェクト: poulp/randomfoOd
def add_utensil():
    """ Ajouter un ustensile """
    store_file = STORE['utensils']
    label = request.json['label']
    actions = [create_uri(uri) for uri in request.json['actions']]

    uri = create_uri(BASE_URI_UTENSIL + sanitize(label))

    load_rdf_file(store_file)
    Utensil(resUri=uri, label=label, actions=actions)
    save_rdf_file(store_file)

    return jsonify({'uri': uri})
コード例 #31
0
ファイル: Packet.py プロジェクト: grapesmoker/packet_parser
    def prepare_html_file(self, html_file, skip_lines=0):
    
        with codecs.open(html_file, 'r', encoding='utf-8') as f:
            packet_contents = f.read()

        packet_contents = re.sub('<br />', '', packet_contents)
            
        packet_contents = [sanitize(element, valid_tags=['em', 'strong']) for element in packet_contents.split('\n')]

        # import ipdb; ipdb.set_trace()

        tossups_start = None
        bonuses_start = None
        for i, item in enumerate(packet_contents):
            if re.search('Tossups', item, flags=re.I) and not tossups_start:
                tossups_start = i + 1
            elif re.search('Bonuses', item, flags=re.I) and not bonuses_start:
                bonuses_start = i

        #print(tossups_start, bonuses_start)

        if tossups_start is not None and bonuses_start is not None:
            tossups = packet_contents[tossups_start:bonuses_start]
            bonuses = packet_contents[bonuses_start + 1:]
            packet_contents = tossups + bonuses
            #print(html_file, '\n', tossups[-1], '\n', bonuses[0], '\n', bonuses[-1])
            #print(bonuses[0])
            #print(bonuses[-1])

        packet_contents = [x.strip() for x in packet_contents if sanitize(x).strip() != ''
                           and len(x) > 20
                           and not x.strip() in ['Extra', 'Extras']
                           and not re.search('^(<.*>|&lt;.*&gt;)', x.strip())]

        with open(html_file, 'w') as f:
            for item in packet_contents:
                f.write(item + '\n')

        return packet_contents[skip_lines:]
コード例 #32
0
ファイル: Tossup.py プロジェクト: alopezlago/packet_parser
    def __init__(self, question='', answer='', number='', packet=None, tournament=None):
        self.question = question
        self.answer = answer
        self.number = number

        self.question = re.sub(num_regex, '', self.question)
        self.question = re.sub(tb_regex, '', self.question)

        if self.question.startswith('<strong>'):
            self.question = '<strong>' + re.sub('^[\d]+\.[\s]*', '', self.question[8:])
            self.question = '<strong>' + re.sub('^TB\.[\s]*', '', self.question[8:])
        
        self.answer = self.answer.replace('<strong><em>', '<req>')
        self.answer = self.answer.replace('<em><strong>', '<req>')
        self.answer = self.answer.replace('</em></strong>', '</req>')
        self.answer = self.answer.replace('</strong></em>', '</req>')

        self.answer_sanitized = sanitize(self.answer, [])
        self.question_sanitized = sanitize(self.question, [])

        self.packet = packet
        self.tournament = tournament
コード例 #33
0
ファイル: notes.py プロジェクト: dmych/cn
    def _renameNote(self, rec, first_line):
    	basename = sanitize(first_line).lower()
	newfilename = basename + '.txt'
	i = 1
	while os.path.exists(os.path.join(self.path, newfilename)):
	    newfilename = basename + '-%s' % i + '.txt'
	    i += 1
	npath = os.path.join(self.path, newfilename)
	opath = self._filepath(rec)
	if rec['filename'] and os.path.exists(opath):
	    log('RENAME %s TO %s' % (opath, npath))
	    os.rename(opath, npath)
	log('NEW FILENAME: %s' % newfilename)
	return newfilename
コード例 #34
0
def main():

    # ensure proper input format from command line
    sanitize(argv)

    # initialize the dnaDB dict using the dictionary file identified in input
    dnaDB = initDict(argv, dbFile, tmp)

    # read the DNA sequence to test into memory (buffer)
    testFile = initFile(tmp, argv)

    # from the dnaDB, retrieve the names of all DNA strands we want to test the new sequence against
    getDnaNames(dnaNameList, dnaDB)

    # find the maximum number of sequences of the DNA strands that exist in the new sequence and add to a list
    getSequences(dnaNameList, results, testFile)

    # search the dnaDB for a match between the list of sequences and entries in the dict.
    # print the name of a match, if one exists
    findMatch(dnaDB, dnaNameList, results)

    # close the DictReader
    closeFile(dbFile)
コード例 #35
0
ファイル: crawler.py プロジェクト: pombredanne/hitsearch
 def get_links(self, links):
     """Gets the links on the page and their link texts."""
     for link in links:
         target = link.get('href', '')
         if target != '':
             target = urlparse.urljoin(self.url, target) # translate link to proper url
             target = self.standardize_url(target) # standardize url
             if self.is_valid_link(target):
                 # get link text
                 words = link.findAll(text=True)
                 words = ' '.join(words).split()
                 link_words = [utils.sanitize(word).lower() for word in words if len(word) != 0]
                 # links are now dictionaries of url: list_of_words
                 self.links[target].extend(link_words)
コード例 #36
0
ファイル: downloader.py プロジェクト: glongzh/vido
def resume_pushing():
    pushing_tasks = Task.objects.filter(status='pushing')
    if pushing_tasks:
        pcs = ByPy()
        for pt in pushing_tasks:
            if pt.item.file_exist():
                push_res = pcs.upload(pt.item.get_item_path(), sanitize(pt.item.title) + '.mp4')
                print 'push_res:' + str(push_res)
                if push_res == 0 or push_res == 60:
                    pt.status = 'pushed'
                    os.remove(pt.item.get_item_path())
                else:
                    pt.status = 'pushing_failed'
                pt.save()
コード例 #37
0
ファイル: formatter.py プロジェクト: knockrentals/us-shapes
    def format_city_shapes(self, outfile):
        if isfile(outfile):
            print "%s already exists, skipping formatting geofile" % outfile
            return

        print "Building city shape files"

        shapefiles_dir = loader.download_city_shapes()

        raw_geofile = '%s/raw_shapes_city.json' % self.raw_geo_dir
        self.converter.to_geojson(outfile=raw_geofile, shapefile_prefix='city', shapefiles_dir=shapefiles_dir)

        # Format results

        raw_geo_re = re.compile(
            '^\{.*?\{.*?\s*"STATEFP":\s*"([^"]+)".*?NAME":\s*"([^"]+)".*?"INTPTLAT":\s*"([^"]+)".*?INTPTLON":\s*"([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$')
        leading_zero_re = re.compile('([+-])0')

        doc_template = '{"id": "%(id)s", "state": "%(state)s", "city": "%(city)s", "center_lat": %(center_lat)s, "center_lon": %(center_lon)s, "geometry": %(coordinates)s}\n'

        with open(outfile, 'a') as out:
            print "Formatting %s into output file %s" % (raw_geofile, outfile)

            for line in fileinput.input(raw_geofile):
                is_good_line = self.good_line_re.match(line)
                if is_good_line is None:
                    continue

                city_info = raw_geo_re.match(line)

                state_code = city_info.group(1)
                city = city_info.group(2)
                center_lat = leading_zero_re.sub(r'\1', city_info.group(3)).replace('+', '')
                center_lon = leading_zero_re.sub(r'\1', city_info.group(4)).replace('+', '')
                coordinates = city_info.group(5)

                state = state_codes[state_code] if state_code in state_codes else state_code
                id = sanitize("%s_%s" % (city, state))

                data = dict(
                    id=id,
                    city=city,
                    state=state,
                    center_lat=center_lat,
                    center_lon=center_lon,
                    coordinates=coordinates
                )

                out.write(doc_template % data)
コード例 #38
0
def main():
    parse_arguments()

    # Gets the Rhythmbox player from DBus
    bus = dbus.SessionBus()
    obj = bus.get_object("org.gnome.Rhythmbox", "/org/gnome/Rhythmbox/Player")
    player = dbus.Interface(obj, "org.gnome.Rhythmbox.Player")

    filename = sanitize(player.getPlayingUri())

    # Prompts user for confirmation on removing the file
    if confirm(filename, title="Delete this file?"):
        player.next()
        call(["rm", filename])
        notify("Removed", "\"" + filename + "\"", icon="user-trash-full")
コード例 #39
0
ファイル: Packet.py プロジェクト: naveenr414/ingestion
    def load_html(self):
        with codecs.open(self.filename, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        prepared_lines = []
        for l in lines:
            # HACK to fix a common case of an answer/text not being on a new line
            # by using how we split up line breaks ourselves
            hacked_l = re.sub('(ANSWER|\[10\])', r'<br>\1', l, re.I)

            split_lines = re.split('<br\s*/?>', hacked_l)
            for split_line in split_lines:
                sanitized_line = sanitize(split_line).strip()
                if is_valid_content(
                        sanitized_line,
                        strippable_lines_res=self.strippable_lines_res):
                    prepared_lines.append(sanitized_line)
        return prepared_lines
コード例 #40
0
    def __init__(self, name, study, edit_media_uri=None, download_url=None):
        self.name = utils.sanitize(name)
        self.study = study

        if edit_media_uri:
            self.is_released = False
            self.edit_media_uri = edit_media_uri
            self.id = edit_media_uri.split('/')[-2]
            host = urlparse.urlparse(edit_media_uri).netloc
            self.download_url = 'http://{0}/dvn/FileDownload/?fileId={1}'.format(host, self.id)
        elif download_url:
            self.is_released = True
            self.download_url = download_url
            self.id = download_url.split('=')[-1]
        else:
            raise utils.DataverseException(
                'Files must have an edit media uri or download url.'
            )
コード例 #41
0
ファイル: crawler.py プロジェクト: pombredanne/hitsearch
 def get_content(self, soup):
     """Gets counts of all of the words on the page."""
     # based on http://groups.google.com/group/beautifulsoup/browse_thread/thread/9f6278ee2a2e4564
     # remove comments
     comments = soup(text=lambda text:isinstance(text, 
         BeautifulSoup.Comment))
     [comment.extract() for comment in comments]
     # remove javascript
     js = soup('script')
     [tag.extract() for tag in js]
     
     # count words!
     body = soup.body(text=True)
     title = soup.title(text=True)
     self.title = ' '.join(title).strip()
     if len(self.title) == 0: self.title = self.url
     words = ' '.join(body).split() + ' '.join(title).split()
     words = [utils.sanitize(word).lower() for word in words if len(word) != 0]
     self.word_counts += Counter(words) 
コード例 #42
0
ファイル: formatter.py プロジェクト: knockrentals/us-shapes
    def format_state_shapes(self, outfile):
        if isfile(outfile):
            print "%s already exists, skipping formatting geofile" % outfile
            return

        print "Building state shape files"

        shapefiles_dir = loader.download_state_shapes()

        raw_geofile = '%s/raw_shapes_state.json' % self.raw_geo_dir
        self.converter.to_geojson(outfile=raw_geofile, shapefile_prefix='state', shapefiles_dir=shapefiles_dir)

        # Format results

        raw_geo_re = re.compile(
            '^\{.*?: \{.*?"STUSPS": "([^"]+)", "NAME": "([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$')

        doc_template = '{"id": "%(id)s", "state": "%(state)s", "postal": "%(postal)s", "geometry": %(coordinates)s}\n'

        with open(outfile, 'a') as out:
            print "Formatting %s into output file %s" % (raw_geofile, outfile)

            for line in fileinput.input(raw_geofile):
                is_good_line = self.good_line_re.match(line)
                if is_good_line is None:
                    continue

                state_info = raw_geo_re.match(line)

                (postal, state, coordinates) = (state_info.group(1), state_info.group(2), state_info.group(3))

                data = dict(
                    id=sanitize(state),
                    state=state,
                    postal=postal,
                    coordinates=coordinates
                )

                out.write(doc_template % data)
コード例 #43
0
ファイル: builder.py プロジェクト: jayzeng/us-shapes
    def build_neighborhood_shapes(self, outfile, raw_geodir='raw_geoshapes',
                                  raw_geofile='raw_shapes_neighborhood.json'):

        print "Building neighborhood shape files"

        shapefiles_dir = download_neighborhood_shapes()
        geofile_path = self.converter.to_geojson(raw_geofile=raw_geofile, raw_geodir=raw_geodir,
                                                 shapefile_prefix='neighborhood', shapefiles_dir=shapefiles_dir)

        # Format results

        raw_geo_re = re.compile(
            '^(\{.*?)"STATE":\s*"([^"]+)".*?"CITY":\s*"([^"]+)".\s*"NAME":\s*"([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$')

        doc_template = '{"id": "%(id)s", "state": "%(state)s", "city": "%(city)s", "neighborhood": "%(neighborhood)s", "geometry": %(coordinates)s}\n'

        with open(outfile, 'a') as out:
            print "Formatting %s into output file %s" % (geofile_path, outfile)

            for line in fileinput.input(geofile_path):
                is_good_line = self.good_line_re.match(line)
                if is_good_line is None:
                    continue

                m = raw_geo_re.match(line)

                (neighborhood, city, state, coordinates) = (m.group(4), m.group(3), m.group(2), m.group(5))
                id = sanitize("%s_%s_%s" % (neighborhood, city, state))

                data = {
                'id': id,
                'neighborhood': neighborhood,
                'city': city,
                'state': state,
                'coordinates': coordinates
                }

                out.write(doc_template % data)
コード例 #44
0
def view_league_compare():
  #keys(request.args) = ['attr', 'attr_val', 'entity', 'results']
  context = dict([])
  context['data']= []
  if(request.method == "POST"):
    cls = utils.sanitize(request.form['cls'])
    print 'cls = ' + cls 
    qrystr = """SELECT TopPlayers.league, AVG(PlaysFormat.damagepermin) 
FROM PlaysFormat, 
	(SELECT PlaysOn.player AS player,TopTeams.league AS league
	FROM PlaysOn, (
                SELECT TD.team AS team, TD.league AS league
                FROM TeamDivision as TD, LeagueDivision as LD
                WHERE LD.rank=1 AND TD.league=LD.league AND TD.division = LD.division) AS TopTeams
	WHERE PlaysOn.team=TopTeams.team) AS TopPlayers
WHERE PlaysFormat.player=TopPlayers.player AND PlaysFormat.class = %s AND PlaysFormat.damagePerMin IS NOT NULL
GROUP BY TopPlayers.league;"""
    print qrystr
    result = g.conn.execute(qrystr, (cls))
    for record in result:
      print record
      context['data'].append({'league':str(record[0]), 'val':str(record[1])})
  return render_template('league_compare.html', **context)
コード例 #45
0
async def send(self, message, args):
    """Sends the image / text to discord

    Arguments:
        self {discordClient} -- Needed
        message {discordMessage} -- The actual message that invoked this command
        args {list[str]} -- Everything that is after the command
    """

    guild_id = message.guild.id
    if not args:
        return

    send = utils.sanitize(args[0])
    res = db.mappings_exists(guild_id, send)
    if not res:
        return

    path = res[0][1]
    if path.split(".")[-1] == "txt":
        await message.channel.send(utils.get_content(path))
    else:
        await disc.send_file(message, path)
コード例 #46
0
def plot_boundary(X, y, coefs, title='', colormap=False, save=False):
    x_min, x_max = np.min(X[:, 0]), np.max(X[:, 0])
    y_min, y_max = np.min(X[:, 1]), np.max(X[:, 1])
    offset = 1

    q = 500
    tx = np.linspace(x_min - offset, x_max + offset, num=q)
    ty = np.linspace(y_min - offset, y_max + offset, num=q)
    X_mesh, Y_mesh = np.meshgrid(tx, ty)
    Z = conics(X_mesh, Y_mesh, coefs)

    plt.figure(figsize=(9, 9))
    if colormap:
        plt.clf
        plt.imshow(Z,
                   origin="lower",
                   extent=[
                       x_min - offset, x_max + offset, y_min - offset,
                       y_max + offset
                   ],
                   aspect=(x_max - x_min) / (y_max - y_min))
    contours = plt.contour(X_mesh, Y_mesh, Z, levels=0, colors='g')
    contours.collections[0].set_label('Decision boundary')
    plt.scatter(X[y == 0, 0], X[y == 0, 1], label='class 0')
    plt.scatter(X[y == 1, 0], X[y == 1, 1], label='class 1')

    plt.xlim(x_min - offset, x_max + offset)
    plt.ylim(y_min - offset, y_max + offset)
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title(title)
    plt.legend(loc="upper right", fontsize=16)
    if save:
        print("Save figure results folder.")
        plt.savefig('results/{}'.format(utils.sanitize(title)))
    plt.show()
コード例 #47
0
ファイル: items.py プロジェクト: fmartingr/minecraftcodex
for java_file in conf.ITEMS_FILES:
    file_handler = open('./classes/%s' % java_file)
    data = file_handler.read().split("\n")

    item_regex = re.compile(conf.ITEMS_PATTERN)
    class_error_regex = re.compile('name \'(?P<name>\w+)\' is not defined')

    for line in data:
        if '"' in line:
            t = item_regex.search(line)
            if t:
                item = t.groupdict()
                if conf.DEBUG:
                    print("Line: " + item['code'])

                item['code'] = utils.sanitize(item['code'])

                if conf.DEBUG:
                    print("Sanitize: " + item['code'])

                try:
                    obj = eval(item['code'])
                except NameError as error:
                    # Create class for the given classname
                    class_name = class_error_regex.search(error.__str__()).group('name')
                    if conf.DEBUG:
                        print("Classname: %s" % class_name)
                    setattr(sys.modules[__name__], class_name, type(class_name, (GameItem,), {}))
                    obj = eval(item['code'])
                #if obj.name == 'appleGold':
                if conf.DEBUG:
コード例 #48
0
def _handle_post_annotation_request(user, document, request):
    if not user.is_authenticated:
        return HttpResponseForbidden()

    annotation = Annotation()
    annotation.annotator = user
    annotation.content = sanitize(request.POST['annotation_content'])
    annotation.document_this_annotation_belongs = document
    annotation.page_index = request.POST["page_id"].split("_")[2]
    annotation.height_percent = request.POST["height_percent"]
    annotation.width_percent = request.POST["width_percent"]
    annotation.top_percent = request.POST["top_percent"]
    annotation.left_percent = request.POST["left_percent"]
    annotation.frame_color = request.POST["frame_color"]
    annotation.is_public = True if request.POST[
        "is_public"] == 'true' else False
    annotation.save()

    # send notification to the document uploader
    if annotation.annotator.pk != document.owner.pk:
        notify.send(
            sender=annotation.annotator,
            recipient=document.owner,
            action_object=annotation,
            verb='post annotation',
            redirect_url=annotation.url,
            image_url=annotation.annotator.portrait_url,
            description=h.handle(annotation.content),
            is_public=annotation.is_public,
        )
    # send notification to the collectors, i.e., followers
    for user in document.collectors.all():
        if annotation.annotator.pk != user.pk and document.owner.pk != user.pk:
            notify.send(
                sender=annotation.annotator,
                recipient=user,
                action_object=annotation,
                verb='post annotation',
                redirect_url=annotation.url,
                image_url=annotation.annotator.portrait_url,
                description=h.handle(annotation.content),
                is_public=annotation.is_public,
            )

    context = {
        "document": document,
        'annotation': annotation,
        'ANONYMOUS_USER_PORTRAIT_URL': settings.ANONYMOUS_USER_PORTRAIT_URL,
        "new_annotation_id": annotation.id,
    }
    return JsonResponse(
        {
            'new_annotationdiv_html':
            render(request, "file_viewer/one_annotation_div.html",
                   context).content,
            'new_annotation_id':
            annotation.id,
            'new_annotation_uuid':
            str(annotation.clean_uuid),
            'new_annotation_json':
            annotation,
        },
        encoder=AnnotationEncoder)
コード例 #49
0
ファイル: bioplot.py プロジェクト: josbouten/bioplot
# Use default or user provided maximum sample numbers
maxNrTargetSamplesPerLabel = int(args.restrictedNrSubjectSamples[0])
maxNrNonTargetSamplesPerLabel = int(args.restrictedNrSubjectSamples[1])

# Let's handle any request for the license first.
# We stop the program after that.
if args.showLicense:
    l = License('LICENSE.txt')
    l.showLicense()
    exit(0)

# Name of the experiment, used as _title in plots.
expName = args.expName

# We do not like spaces!
filenames = sanitize(args.filenames)
# filename = args.filename
dataType = args.dataType

# Threshold used by biometric system to make a decision
# only of interest if you want to plot the systems Accuracy.
threshold = args.threshold

config = Config(args.configFilename)

debug = config.getDebug()

compute_eer = False
compute_cllr = False
eerObject = None
cllrObject = None
コード例 #50
0
 def get_query(self):
     return sanitize(bvecs_mmap(self.querydir))
コード例 #51
0
 def get_train(self):
     return sanitize(bvecs_mmap(self.traindir))
コード例 #52
0
 def get_base_iterator(self):
     for i in range(self.num):
         filename = self.basedir.format(i)
         yield sanitize(fvecs_mmap(filename))
コード例 #53
0
def _handle_post_annotation_reply_request(user, document, request):
    if request.POST["annotation_reply_content"] == "":
        return HttpResponse(status=200)
    if not user.is_authenticated:
        return HttpResponseForbidden()

    annotation_reply = AnnotationReply()
    annotation = Annotation.objects.get(
        id=int(request.POST["reply_to_annotation_id"]))
    annotation_reply.content = sanitize(
        request.POST["annotation_reply_content"])
    annotation_reply.replier = user
    annotation_reply.reply_to_annotation = annotation
    annotation_reply.is_public = True if request.POST[
        "is_public"] == 'true' else False

    annotation_poster = annotation_reply.reply_to_annotation.annotator

    if "reply_to_annotation_reply_id" in request.POST:
        annotation_reply.reply_to_annotation_reply = AnnotationReply.objects.get(
            id=int(request.POST["reply_to_annotation_reply_id"]))
        # avoid 2 duplicate notifications in the case
        # where the annotation I reply to and the reply I reply to are both from the same guy
        replier_who_i_reply_to = annotation_reply.reply_to_annotation_reply.replier
        if replier_who_i_reply_to.pk != annotation_poster.pk and user.pk != replier_who_i_reply_to.pk:
            notify.send(
                sender=annotation_reply.replier,
                recipient=annotation_reply.reply_to_annotation_reply.replier,
                action_object=annotation_reply,
                verb='reply to annotation reply',
                redirect_url=annotation.url,
                image_url=annotation_reply.replier.portrait_url,
                description=h.handle(annotation_reply.content),
                is_public=annotation_reply.is_public,
            )

    annotation_reply.save()
    if user.pk != annotation_poster.pk:
        notify.send(
            sender=annotation_reply.replier,
            recipient=annotation_reply.reply_to_annotation.annotator,
            action_object=annotation_reply,
            verb='reply to annotation',
            redirect_url=annotation.url,
            image_url=annotation_reply.replier.portrait_url,
            description=h.handle(annotation_reply.content),
            is_public=annotation_reply.is_public,
        )
    context = {
        "annotation_reply": annotation_reply,
        'ANONYMOUS_USER_PORTRAIT_URL': settings.ANONYMOUS_USER_PORTRAIT_URL,
    }
    return JsonResponse(
        {
            'new_annotationreply_html':
            render(request, "file_viewer/one_annotation_reply.html",
                   context).content,
            'new_annotationreply_json':
            annotation_reply,
        },
        encoder=AnnotationReplyEncoder)
コード例 #54
0
ファイル: Packet.py プロジェクト: naveenr414/ingestion
    def parse_packet(self):
        lines = self.load_html()

        tossups = []
        current_tossup = Tossup(1)
        # assume we start with tossups first, since that's what almost
        # every packet structure looks like
        parsing_tossups = True

        bonuses = []
        current_bonus = Bonus(1)

        for l in lines:
            sanitized_l = sanitize(reformat_line(l))
            # import pdb; pdb.set_trace()

            # edge case for switching from tossups to bonuses
            # we use -1 as a short circuit to say "use the Bonuses marker instead"
            if (self.num_tossups != -1 and len(tossups) + 1 >= self.num_tossups
                    and self.bonus_leadin_re.search(sanitized_l)
                    and current_tossup.has_content()):
                parsing_tossups = False

            if parsing_tossups:
                if self.tossup_text_re.search(sanitized_l):
                    # assume finding a new tossup means we're done with the old one
                    if current_tossup.has_content():
                        tossups.append(current_tossup)
                    current_tossup = Tossup(len(tossups) + 1)
                    current_tossup.text = self.tossup_text_re.sub("",
                                                                  l,
                                                                  count=1)
                    current_tossup.answer = ""
                # TODO: handle case where the next answer line isn't actually on the next line *sigh*
                elif self.tossup_answer_re.search(sanitized_l):
                    current_tossup.answer = self.tossup_answer_re.sub("",
                                                                      l,
                                                                      count=1)
                else:
                    if re.search(r'^bonus(es)?$', sanitized_l, re.I):
                        parsing_tossups = False
                        next
                    # this assumes everything between the current answer and the beginning of the
                    # next tossup is wortwhile answer text
                    # this will catch some junk in the middle occasionally, but we should
                    # handle that by stripping it in the preparation stuff, and this approach
                    # allows us to handle bad packet loading that turns paragraphs into multilines
                    if current_tossup.has_content():
                        if current_tossup.answer != "":
                            current_tossup.answer += (" " + l)
                        else:
                            current_tossup.text += (" " + l)

            else:
                if self.bonus_leadin_re.search(sanitized_l):
                    if len(bonuses) == 0 and current_tossup.has_content():
                        tossups.append(current_tossup)
                        current_tossup = Tossup(len(tossups) + 1)
                    if current_bonus.has_content():
                        bonuses.append(current_bonus)
                    current_bonus = Bonus(len(bonuses) + 1,
                                          leadin=self.bonus_leadin_re.sub(
                                              "", l, count=1))
                elif self.bonuspart_text_re.search(sanitized_l):
                    current_bonus.texts += [
                        self.bonuspart_text_re.sub("", l, count=1)
                    ]
                elif self.bonuspart_answer_re.search(sanitized_l):
                    current_bonus.answers += [
                        self.bonuspart_answer_re.sub("", l, count=1)
                    ]
                else:
                    if current_bonus.texts == [] and current_bonus.answers == []:
                        current_bonus.leadin += (" " + l)
                    elif len(current_bonus.texts) > len(current_bonus.answers):
                        current_bonus.texts[-1] += (" " + l)
                    else:
                        current_bonus.answers[-1] += (" " + l)

        if current_tossup.has_content():
            tossups.append(current_tossup)
        if current_bonus.has_content():
            bonuses.append(current_bonus)

        for i in range(len(tossups)):
            tossups[i].tournament = self.tournament
            tossups[i].round = self.round
        for i in range(len(bonuses)):
            bonuses[i].tournament = self.tournament
            bonuses[i].round = self.round

        self.tossups = tossups
        self.bonuses = bonuses
        return (tossups, bonuses)
コード例 #55
0
 def title(self):
     return utils.sanitize(self.collection.title)
コード例 #56
0
 def _format_audio_name(audio_info: Dict):
     name = '{} - {}'.format(audio_info['artist'], audio_info['title'])
     name = sanitize(name, to_lower=False, alpha_numeric_only=False)
     return '{}.mp3'.format(name)
コード例 #57
0
    df_perturb = utils.load_dataframe(args.perturb_data)

    y_train, classes = utils.load_labels(args.train_labels)
    y_perturb, _ = utils.load_labels(args.perturb_labels, classes)

    print('loaded train data (%s genes, %s samples)' % (df_train.shape[1], df_train.shape[0]))
    print('loaded perturb data (%s genes, %s samples)' % (df_perturb.shape[1], df_perturb.shape[0]))

    # impute missing values
    min_value = df_train.min().min()

    df_train.fillna(value=min_value, inplace=True)
    df_perturb.fillna(value=min_value, inplace=True)

    # sanitize class names
    classes = [utils.sanitize(c) for c in classes]

    # determine target class
    try:
        if args.target == None:
            args.target = -1
        else:
            args.target = classes.index(args.target)
            print('target class is: %s' % (classes[args.target]))
    except ValueError:
        print('error: class %s not found in dataset' % (args.target))
        sys.exit(1)

    # load gene sets file if it was provided
    if args.gene_sets != None:
        print('loading gene sets...')
コード例 #58
0
                if 'github' in l and 'bertalan' in l
            ]
            if len(remotes) > 0:
                repo = 'http://github.com/' + remotes[0].split()[1][15:][:-4]
        except CalledProcessError:
            pass

        # Generate HTML for the page contents.
        readmeHtml = markdown_to_html(description)

        # Assign urls to the entry pages.
        for k, entry in enumerate(entries):
            entry['url'] = (
                # 'entry_%d-' % k
                # +
                utils.sanitize(entry['title'].replace(' ', '_'), extra='_') +
                '.html')
            entry['save_path'] = join(wwwDir, baseProjectName, entry['url'])

        # Generate and write the project page.
        breadcrumbs = [
            utils.Tag('a', href='../index.html', tagText='Home'),
            utils.Tag('span', tagText=baseProjectName, parseTagText=False)
        ]

        home_title_link = utils.Tag('a',
                                    tagText='Tom Bertalan',
                                    href='../index.html',
                                    cls='mdl-typography--headline',
                                    style='text-decoration:none; color:#444;')
コード例 #59
0
ファイル: candidates.py プロジェクト: winnersky/crawlers
 def parse_member_party(self, member):
     member['party'] = sanitize(member['party'][0])