def twitterBOT(api, screen_name, min_RT_count, min_fav_count, hashtags, keywords):
    
    #initialize a list to hold all the tweepy Tweets
    alltweets = []

    for keyword in keywords:    
    # search for a specific keyword
        firstTime = True
        while True:
            tweets_within_24_hours = []
            if firstTime:
                new_tweets = api.search(q=keyword)
            else:
                new_tweets = api.search(q=keyword, since_id = oldest)
                
            for t in new_tweets:
                if in24Hours(t.created_at):
                    tweets_within_24_hours.append(t)
                    print unidecode(t.text)

            #save most recent tweets
            alltweets.extend(tweets_within_24_hours)

            if len(new_tweets) < 200:
                break
            else:
                firstTime = False
                oldest = new_tweets[-1].id

    for t in alltweets:
        t.user.follow()
        start = time.time()
        name = t.user.name
        userExists = 0
        try:
            sql1 = 'select name, time_followed, following from twitter where name = "' + str(name) + '"'
            c.execute(sql1)
            rows = fetchone()
            userExists = 1
            time_followed = float(rows[1])
            if start - time_followed > 172800:
                t.user.unfollow()
        except:
            pass

        if not userExists:
            sql = 'insert into twitter (name, time_followed, following) values ( "' + \
                  unidecode(name) + '", ' +\
                  str(start) + ', 1 );'
            c.execute(sql)
            conn.commit()
        if any(word.lower() in t.text.lower() for word in hashtags):
            if int(t.retweet_count)> int(min_RT_count):
                print 'int(t.retweet_count)', int(t.retweet_count)
                t.retweet()
            if int(t.favorite_count) > int(min_fav_count):
                print 'int(t.favorite_count)', int(t.favorite_count)
                t.favorite()
예제 #2
0
    def add_payment(self, payment):
        """
        Function to add payments
        @param payment: The payment dict
        @raise exception: when payment is invalid
        """
        # Validate the payment
        self.check_payment(payment)

        if self.clean:
            from text_unidecode import unidecode

            payment['name'] = unidecode(payment['name'])[:70]
            payment['description'] = unidecode(payment['description'])[:140]

        # Get the CstmrDrctDbtInitnNode
        if not self._config['batch']:
            # Start building the non batch payment
            PmtInf_nodes = self._create_PmtInf_node()
            PmtInf_nodes['PmtInfIdNode'].text = make_id(self._config['name'])
            PmtInf_nodes['PmtMtdNode'].text = "TRF"
            PmtInf_nodes['BtchBookgNode'].text = "false"
            PmtInf_nodes['NbOfTxsNode'].text = "1"
            PmtInf_nodes['CtrlSumNode'].text = int_to_decimal_str(
                payment['amount']
            )
            PmtInf_nodes['Cd_SvcLvl_Node'].text = "SEPA"
            if 'execution_date' in payment:
                PmtInf_nodes['ReqdExctnDtNode'].text = payment['execution_date']
            else:
                del PmtInf_nodes['ReqdExctnDtNode']

            PmtInf_nodes['Nm_Dbtr_Node'].text = self._config['name']
            PmtInf_nodes['IBAN_DbtrAcct_Node'].text = self._config['IBAN']
            if 'BIC' in self._config:
                PmtInf_nodes['BIC_DbtrAgt_Node'].text = self._config['BIC']

            PmtInf_nodes['ChrgBrNode'].text = "SLEV"

        if 'BIC' in payment:
            bic = True
        else:
            bic = False

        TX_nodes = self._create_TX_node(bic)
        TX_nodes['InstdAmtNode'].set("Ccy", self._config['currency'])
        TX_nodes['InstdAmtNode'].text = int_to_decimal_str(payment['amount'])
        TX_nodes['EndToEnd_PmtId_Node'].text = payment.get('endtoend_id', 'NOTPROVIDED')
        if bic:
            TX_nodes['BIC_CdtrAgt_Node'].text = payment['BIC']
        TX_nodes['Nm_Cdtr_Node'].text = payment['name']
        TX_nodes['IBAN_CdtrAcct_Node'].text = payment['IBAN']
        TX_nodes['UstrdNode'].text = payment['description']

        if self._config['batch']:
            self._add_batch(TX_nodes, payment)
        else:
            self._add_non_batch(TX_nodes, PmtInf_nodes)
예제 #3
0
파일: models.py 프로젝트: lauseb/cooking
    def add_tags(self):
        self.tags.clear()
        tag_names = set(unidecode(s.lower()) for s in self.name.split())
        tag_names.update(unidecode(quantity.ingredient.name.lower())
                         for quantity in self.quantities)

        for tag_name in tag_names:
            tag = Tag.query.filter_by(name=tag_name).first()
            if tag is None:
                tag = Tag(name=tag_name)
            self.tags.append(tag)
예제 #4
0
    def verifica_estrutura_cabecalho(cls, cabecalho):
        estrutura_correta = True
        for coluna, nome in cls.__CABECALHOS.items():
            titulo_coluna_arquivo = unidecode(cabecalho[coluna])
            titulo_coluna_modelo = unidecode(nome)
            if titulo_coluna_arquivo != titulo_coluna_modelo:
                msg_erro = (
                    f'Título da coluna {coluna} errado. Encontrado "{cabecalho[coluna]}". '
                    f'Deveria ser "{nome}". Confira o arquivo com o modelo.')
                raise CargaAssociacaoException(msg_erro)

        return estrutura_correta
예제 #5
0
    def fingerprint(self):
        key = self.key
        if not isinstance(key, str):
            key = unidecode(key)
        fp = fingerprints.generate(key)

        if TRACE_TEXT or TRACE_FP:
            logger_debug('Text.fingerprint:key: ', repr(self.key))
            logger_debug('Text.fingerprint:fp :    ',
                         fingerprints.generate(unidecode(self.key)))

        self.key = fp
예제 #6
0
def getElement(elements, name):
    """Function to extract data from publication page which has data in tables"""
    try:
        for e in elements:
            if unidecode(e.find('th').getText()).strip() == unidecode(name).strip():
                if name.strip() == 'DOI:':
                    ## if DOI is to be extracted, we need the pdf link
                    return e.find('td').find('a')['href']
                else:
                    ## for all other elements, text is required
                    return e.find('td').getText().strip()
    except:
        pass
 def cabecalho_correto(cls, cabecalho):
     estrutura_correta = True
     for coluna, nome in cls.__CABECALHOS.items():
         titulo_coluna_arquivo = unidecode(cabecalho[coluna])
         titulo_coluna_modelo = unidecode(nome)
         if titulo_coluna_arquivo != titulo_coluna_modelo:
             msg_erro = (
                 f'Título da coluna {coluna} errado. Encontrado "{cabecalho[coluna]}". '
                 f'Deveria ser "{nome}". Confira o arquivo com o modelo.')
             logger.error(msg_erro)
             cls.logs = f"{cls.logs}\n{msg_erro}"
             estrutura_correta = False
             break
     return estrutura_correta
예제 #8
0
def test_recipe_add_tags(set_db):
    #TODO warning here when we add "eau" for the second time,
    # when adding the quantity in add_ingredients. Try to fix that ?
    ingredients_eausel = [{
        "name": "eau",
        "quantity": 1,
        "unit": "L"
    }, {
        "name": "sel",
        "quantity": 10,
        "unit": "g"
    }]

    ingredients_eaupoivre = [{
        "name": "eau",
        "quantity": 2,
        "unit": "mL"
    }, {
        "name": "poivre",
        "quantity": 10,
        "unit": "g"
    }]

    recipe_eausel = Recipe(name="eau salée")
    recipe_eausel.add_ingredients(ingredients_eausel)
    recipe_eausel.add_tags()

    db.session.add(recipe_eausel)
    db.session.commit()

    recipe_eaupoivre = Recipe(name="eau poivrée")
    recipe_eaupoivre.add_ingredients(ingredients_eaupoivre)
    recipe_eaupoivre.add_tags()

    db.session.add(recipe_eaupoivre)
    db.session.commit()

    expected_tags = {
        unidecode(ingredient["name"]).lower()
        for ingredient in chain(ingredients_eausel, ingredients_eaupoivre)
    }
    expected_tags.update(
        unidecode(elem).lower() for elem in recipe_eausel.name.split())
    expected_tags.update(
        unidecode(elem).lower() for elem in recipe_eaupoivre.name.split())
    assert expected_tags == set(tag.name for tag in Tag.query)
    assert len(Tag.query.filter_by(name="eau").first().recipes) == 2
    assert len(Tag.query.filter_by(name="sel").first().recipes) == 1
    assert len(Tag.query.filter_by(name="salee").first().recipes) == 1
예제 #9
0
def parse(element_html, data):
    element = lxml.html.fragment_fromstring(element_html)
    file_name = pl.get_string_attrib(element, 'file-name', '')
    answer_name = get_answer_name(file_name)
    normalize_to_ascii = pl.get_boolean_attrib(element, 'normalize-to-ascii', NORMALIZE_TO_ASCII_DEFAULT)

    # Get submitted answer or return parse_error if it does not exist
    file_contents = data['submitted_answers'].get(answer_name, None)
    if not file_contents:
        add_format_error(data, 'No submitted answer for {0}'.format(file_name))
        return

    if normalize_to_ascii:
        try:
            decoded_contents = base64.b64decode(file_contents).decode('utf-8')
            normalized = unidecode(decoded_contents)
            file_contents = base64.b64encode(normalized.encode('UTF-8').strip()).decode()
            data['submitted_answers'][answer_name] = file_contents
        except UnicodeError:
            add_format_error(data, 'Submitted answer is not a valid UTF-8 string.')

    if data['submitted_answers'].get('_files', None) is None:
        data['submitted_answers']['_files'] = []
        data['submitted_answers']['_files'].append({
            'name': file_name,
            'contents': file_contents
        })
    elif isinstance(data['submitted_answers'].get('_files', None), list):
        data['submitted_answers']['_files'].append({
            'name': file_name,
            'contents': file_contents
        })
    else:
        add_format_error(data, '_files was present but was not an array.')
예제 #10
0
    def __init__(self, config, schema, clean=True):
        """
        Constructor. Checks the config, prepares the document and
        builds the header.
        @param param: The config dict.
        @raise exception: When the config file is invalid.
        """
        self._config = None  # Will contain the config file.
        self._xml = None  # Will contain the final XML file.
        self._batches = OrderedDict()  # Will contain the SEPA batches.
        self._batch_totals = OrderedDict()  # Will contain the total amount to debit per batch for checksum total.
        self.schema = schema
        self.msg_id = make_msg_id()
        self.clean = clean

        config_result = self.check_config(config)
        if config_result:
            self._config = config
            if self.clean:
                from text_unidecode import unidecode

                self._config['name'] = unidecode(self._config['name'])[:70]

        self._prepare_document()
        self._create_header()
예제 #11
0
    def save(self):

        if self.position_id:
            try:
                position_model = Positions.objects.get(id=self.position_id)
            except Positions.DoesNotExist:
                raise serializers.ValidationError(
                    {'category': ['position not found']})
        else:
            position_model = Positions()

        category_id = self.validated_data.get('category',
                                              position_model.category_id)
        name = self.validated_data.get('name', position_model.name)
        is_active = self.validated_data.get('is_active',
                                            position_model.is_active)
        slug = slugify(unidecode(name['ru']))
        if Positions.objects.filter(slug__iexact=slug).exists():
            slug = f"{slug}-{datetime.now().timestamp()}"

        position_model.category_id = category_id
        position_model.name = name
        position_model.is_active = is_active
        position_model.slug = slug
        position_model.save()
        try:
            position_model.clean()
        except Exception as e:
            print('error')
            print(str(e))
        return position_model
예제 #12
0
    def save(self):

        if self.category_id:
            try:
                category_model = Category.objects.get(id=self.category_id)
            except Category.DoesNotExist:
                raise serializers.ValidationError(
                    {'category': ['category found region']})
        else:
            category_model = Category()

        parent_id = self.validated_data.get('parent_id',
                                            category_model.parent_id)
        name = self.validated_data.get('name', category_model.name)
        is_active = self.validated_data.get('is_active',
                                            category_model.is_active)
        is_main = self.validated_data.get('is_main', category_model.is_main)
        sort_order = self.validated_data.get('sort_order',
                                             category_model.sort_order)
        slug = slugify(unidecode(name['ru']))
        if Category.objects.filter(slug__iexact=slug).exists():
            slug = f"{slug}-{datetime.now().timestamp()}"

        category_model.parent_id = parent_id
        category_model.name = name
        category_model.is_active = is_active
        category_model.is_main = is_main
        category_model.sort_order = sort_order
        category_model.slug = slug
        category_model.save()

        return category_model
예제 #13
0
def parse_namespace_repository(
    repository, library_namespace, include_tag=False, allow_library=True
):
    repository = unidecode(repository)

    parts = repository.rstrip("/").split("/", 1)
    if len(parts) < 2:
        namespace = library_namespace
        repository = parts[0]
        if not allow_library:
            raise ImplicitLibraryNamespaceNotAllowed()
    else:
        (namespace, repository) = parts

    if include_tag:
        parts = repository.split(":", 1)
        if len(parts) < 2:
            tag = "latest"
        else:
            (repository, tag) = parts

    repository = urllib.parse.quote_plus(repository)
    if include_tag:
        return (namespace, repository, tag)
    return (namespace, repository)
예제 #14
0
def parse(element_html, data):
    element = lxml.html.fragment_fromstring(element_html)
    name = pl.get_string_attrib(element, 'answers-name')
    # Get allow-blank option
    allow_blank = pl.get_string_attrib(element, 'allow-blank',
                                       ALLOW_BLANK_DEFAULT)
    normalize_to_ascii = pl.get_boolean_attrib(element, 'normalize-to-ascii',
                                               NORMALIZE_TO_ASCII_DEFAULT)

    # Get submitted answer or return parse_error if it does not exist
    a_sub = data['submitted_answers'].get(name, None)
    if a_sub is None:
        data['format_errors'][name] = 'No submitted answer.'
        data['submitted_answers'][name] = None
        return

    if normalize_to_ascii:
        a_sub = unidecode(a_sub)
        data['submitted_answers'][name] = a_sub

    if not a_sub and not allow_blank:
        data['format_errors'][
            name] = 'Invalid format. The submitted answer was left blank.'
        data['submitted_answers'][name] = None
    else:
        data['submitted_answers'][name] = pl.to_json(a_sub)
예제 #15
0
def generate_valid_usernames(input_username):
    if isinstance(input_username, bytes):
        try:
            input_username = input_username.decode("utf-8")
        except UnicodeDecodeError as ude:
            raise UnicodeDecodeError(
                "Username %s contains invalid characters: %s", input_username,
                ude)

    normalized = unidecode(input_username).strip().lower()
    prefix = re.sub(INVALID_USERNAME_CHARACTERS, "_",
                    normalized)[:MAX_USERNAME_LENGTH]
    prefix = re.sub(r"_{2,}", "_", prefix)

    if prefix.endswith("_"):
        prefix = prefix[0:len(prefix) - 1]

    while prefix.startswith("_"):
        prefix = prefix[1:]

    num_filler_chars = max(0, MIN_USERNAME_LENGTH - len(prefix))

    while num_filler_chars + len(prefix) <= MAX_USERNAME_LENGTH:
        for suffix in _gen_filler_chars(num_filler_chars):
            yield prefix + suffix
        num_filler_chars += 1
예제 #16
0
    def apply_unidecode(self, to_convert: str):
        if self.norm_form:
            to_convert = normalize(to_convert, self.norm_form)
        tg = TransductionGraph(to_convert)

        # Conversion is done character by character using unidecode
        converted = [text_unidecode.unidecode(c) for c in to_convert]
        tg.output_string = "".join(converted)

        # Edges are calculated to follow the conversion step by step
        if tg.output_string == "":
            # Some inputs get completely deleted by unidecode, in which case there are no
            # valid edges to output.
            tg.edges = []
        else:
            edges = []
            x_len, y_len = 0, 0
            for tgt in converted:
                if tgt:
                    for c in tgt:
                        edges.append((x_len, y_len))
                        y_len += 1
                else:
                    edges.append((x_len, max(y_len - 1, 0)))
                x_len += 1
            tg.edges = edges

        return tg
예제 #17
0
def voc_corpus():
  """Construct the vocabulary based on most frequent opinion words in learning
  set.

  Parameters
  ----------

  Returns
  -------
  None
    The vocabulary is constructed.
  """
  from learning_class import learning
  learning()
  from learning_class import tokenized_learning_class
  words = []
  for tokens in tokenized_learning_class.values():
    words += tokens
  freq = nltk.FreqDist(words)
  words_freq = map(lambda (a, b) : a, freq.most_common())
  for i in range(len(words_freq)):
    words_freq[i] = unidecode(words_freq[i])
    if pos_bool:
      list_senti_synsets = swn.senti_synsets(words_freq[i][:-2], words_freq[i][-1])
    else:
      list_senti_synsets = swn.senti_synsets(words_freq[i][:-2])
    if list_senti_synsets == []:
      continue
    if condition(list_senti_synsets):
      add_key(voc, words_freq[i])
    if voc_size == max_voc_size:
      break
예제 #18
0
파일: forms.py 프로젝트: polyatail/saleor
 def save(self, commit=True):
     self.instance.slug = slugify(unidecode(self.instance.name))
     if self.parent_pk:
         self.instance.parent = get_object_or_404(Category,
                                                  pk=self.parent_pk)
     super(CategoryForm, self).save(commit=commit)
     return self.instance
예제 #19
0
 def get_absolute_url(self):
     return reverse('product:brand',
                    kwargs={
                        'path':
                        slugify(smart_text(unidecode(self.brand_name))),
                        'brand_id': self.id
                    })
예제 #20
0
def get_crossword_string(topic):
    # return upper case alphanumeric only
    topic = unidecode(topic)
    topic = re.sub('&', 'AND', topic)
    topic = topic.upper()
    only_alphanumeric = re.compile('[\W]+')
    return only_alphanumeric.sub('', topic)
예제 #21
0
파일: views.py 프로젝트: shcrossan/iti-sms
def sendSMS(request):
    form = SendMsgForm(request.POST or None)
    form2 = SaveMsgForm()
    gateway = SmsGateway()
    if form.is_valid():
        number = form.cleaned_data['phoneNumber']
        number_list = number.split(",")
        message3 = form.cleaned_data['message']
        message2 = unidecode(message3)
        message = message2.replace("'", " ")
        deviceID = request.POST.get('deviceID')
        device_obj = device.objects.all()
        for d_obj in device_obj:
            if d_obj.user == request.user:
                for num in number_list:
                    accountEmail = d_obj.accountEmail
                    accountPassword = d_obj.accountPassword
                    gateway.loginDetails(accountEmail, accountPassword)
                    gateway.sendMessageToNumber(num, message, deviceID)
                    save_it = form2.save(commit=False)
                    save_it.user = request.user
                    save_it.sentTo = num
                    save_it.msgText = message
                    save_it.save()
        messages.success(request, 'Message Envoye')
        return redirect('/messages/0')
    username = request.user.username
    device_obj = device.objects.all()
    contact_list = contacts.objects.filter(user=request.user).order_by('firstName')
    group_list = contactgroup.objects.filter(contact__user=request.user).distinct().order_by('groupName')
    template_list = msgTemplates.objects.filter(user=request.user).distinct()
    context = {"form": form}
    template = "sendsms.html"
    pg = ['active', '', '']
    return render_to_response(template, locals(), context_instance=RequestContext(request))
예제 #22
0
def norm_names(x):
    if isinstance(x,float):
        return x
    else:
        text = unidecode.unidecode(x)
        normed = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode()
        return normed 
예제 #23
0
def slugify_text(text):
    if not isinstance(text, str):
        text = str(text, 'utf-8', 'ignore')

    text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
    text = unidecode(text)
    text = CHAR_ENTITY_PATTERN.sub(
        lambda m: unichr(name2codepoint[m.group(1)]), text)

    try:
        text = DECIMAL_PATTERN.sub(lambda m: unichr(int(m.group(1))), text)
    except Exception:
        pass

    try:
        text = HEX_PATTERN.sub(lambda m: unichr(int(m.group(1), 16)), text)
    except Exception:
        pass

    text = unicodedata.normalize('NFKD', text)
    text = text.lower()
    text = QUOTE_PATTERN.sub('', text)
    text = re.sub(ALLOWED_CHARS_PATTERN, DEFAULT_SEPARATOR, text)
    text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR,
                                      text).strip(DEFAULT_SEPARATOR)
    return text
예제 #24
0
def _message_to_tag_value(message,
                          allowed_chars=string.ascii_lowercase +
                          string.digits + '_'):
    """
    Turn a long user-facing error message into a short slug that can be used as a datadog tag value

    passes through unidecode to get something ascii-compatible to work with,
    then uses the first four space-delimited words and filters out unwanted characters.

    >>> _message_to_tag_value('Sorry, an error occurred while processing that request.')
    'sorry_an_error_occurred'
    >>> _message_to_tag_value('Another process prevented us from servicing your request. Please try again later.')
    'another_process_prevented_us'
    >>> _message_to_tag_value('509 Unknown Status Code')
    '509_unknown_status_code'
    >>> _message_to_tag_value(
    ... 'EntityScreen EntityScreen [Detail=org.commcare.suite.model.Detail@1f984e3c, '
    ... 'selection=null] could not select case 8854f3583f6f46e69af59fddc9f9428d. '
    ... 'If this error persists please report a bug to CommCareHQ.')
    'entityscreen_entityscreen_detail_org'
    """
    message_tag = unidecode(message)
    message_tag = ''.join(
        (c if c in allowed_chars else ' ') for c in message_tag.lower())
    message_tag = '_'.join(re.split(r' +', message_tag)[:4])
    return message_tag[:59]
예제 #25
0
 def _format_account_name(self, name):
     name = self.NAME_FORBIDDEN_CHARS_REGEX.sub('-', name)
     name = self.DASH_COLON_DASH_REGEX.sub(':', name)
     name = self.DASH_EOL_REGEX.sub('', name)
     name = self.DASH_DASH_REGEX.sub('-', name)
     name = unidecode(name)
     return config.BC_ACCOUNTS_DICT.get(name, name)
예제 #26
0
def build_form_multimedia_zip(
        domain,
        export_id,
        datespan,
        user_types,
        download_id,
        owner_id,
):
    from corehq.apps.export.models import FormExportInstance
    export = FormExportInstance.get(export_id)
    form_ids = get_form_ids_having_multimedia(
        domain, export.app_id, export.xmlns, datespan, user_types
    )
    forms_info = _get_form_attachment_info(domain, form_ids, export)

    num_forms = len(forms_info)
    DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms)

    all_case_ids = set.union(*(info['case_ids'] for info in forms_info)) if forms_info else set()
    case_id_to_name = _get_case_names(domain, all_case_ids)

    with TransientTempfile() as temp_path:
        with open(temp_path, 'wb') as f:
            _write_attachments_to_file(temp_path, num_forms, forms_info, case_id_to_name)
        with open(temp_path, 'rb') as f:
            zip_name = 'multimedia-{}'.format(unidecode(export.name))
            _save_and_expose_zip(f, zip_name, domain, download_id, owner_id)

    DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
예제 #27
0
    def save(self):

        if self.schedule_id:
            try:
                schedule_model = Schedules.objects.get(id=self.schedule_id)
            except Schedules.DoesNotExist:
                raise serializers.ValidationError(
                    {'schedule': ['schedule not found']})
        else:
            schedule_model = Schedules()

        name = self.validated_data.get('name', schedule_model.name)
        is_active = self.validated_data.get('is_active',
                                            schedule_model.is_active)
        slug = self.validated_data.get("alias", schedule_model.alias)
        slug = slugify(unidecode(slug))

        if not self.schedule_id:
            if Schedules.objects.filter(alias__iexact=slug).exists():
                slug = f"{slug}-{datetime.now().timestamp()}"

        schedule_model.name = name
        schedule_model.alias = slug
        schedule_model.is_active = is_active
        schedule_model.save()
        try:
            schedule_model.clean()
        except Exception as e:
            print('error')
            print(str(e))
        return schedule_model
예제 #28
0
def toascii(s, translit=False):
    """
    Convert a Unicode or byte string to ASCII characters, including replacing
    accented characters with their non-accented equivalent.

    If `translit` is False use the Unicode NFKD equivalence.
    If `translit` is True, use a transliteration with the unidecode library.

    Non ISO-Latin and non ASCII characters are stripped from the output. When no
    transliteration is possible, the resulting character is replaced by an
    underscore "_".

    For Unicode NFKD equivalence, see http://en.wikipedia.org/wiki/Unicode_equivalence
    The convertion may NOT preserve the original string length and with NFKD some
    characters may be deleted.
    Inspired from: http://code.activestate.com/recipes/251871/#c10 by Aaron Bentley.
    """
    if not isinstance(s, compat.unicode):
        s = as_unicode(s)
    if translit:
        converted = unidecode(s)
    else:
        converted = unicodedata.normalize('NFKD', s)

    converted = converted.replace('[?]', '_')
    converted = converted.encode('ascii', 'ignore')
    return converted.decode('ascii')
예제 #29
0
def getSKUData(driver, prod, queue):
    data = []
    asin = (prod['asin'])
    image = prod['image']
    title = prod['title']
    price = prod['price_new'].replace(',', '.')
    num_reviews = prod['num_reviews']
    url = 'http://www.amazon.com/dp/' + unidecode(asin)

    #driver = webdriver.Firefox()
    driver.get(url)

    try:
        product_details = driver.find_element_by_id(
            'productDetails_techSpec_section_1').find_elements_by_tag_name(
                'tr')
    except:
        product_details = []
    try:
        product_details += driver.find_element_by_id(
            'productDetails_detailBullets_sections1'
        ).find_elements_by_tag_name('tr')
    except:
        pass
    try:
        product_details += driver.find_element_by_id(
            'productDetails_techSpec_section_2').find_elements_by_tag_name(
                'tr')
    except:
        pass
    try:
        product_details += driver.find_element_by_id(
            'productDetails_feature_div').find_elements_by_tag_name('tr')
    except:
        pass

    salesRank = 'NA'
    # try to get the sales rank else return True. Does not work sometimes. Need to be more robust.
    try:
        salesRankElem = getElement(product_details,
                                   'Best Sellers Rank').strip()
    except Exception, e:
        try:
            product = amazon.lookup(ItemId=unidecode(asin))
            salesRank = product.sales_rank
        except:
            salesRank = 'NA'
예제 #30
0
    def save(self, commit=True):
        self.instance.slug = slugify(unidecode(self.instance.name))
        instance = super().save(commit=commit)

        if instance.pk and 'background_image' in self.changed_data:
            create_collection_background_image_thumbnails.delay(instance.pk)

        return instance
예제 #31
0
def main(args):
  """
  Usage:
    unidecode [<input_file> [<output_file>]]
  """

  write_file(args["<output_file>"],
             text_unidecode.unidecode(read_file(args["<input_file>"])))
예제 #32
0
    def save(self, commit=True):
        self.instance.slug = slugify(unidecode(self.instance.name))
        instance = super().save(commit=commit)

        if instance.pk and 'background_image' in self.changed_data:
            create_collection_background_image_thumbnails.delay(instance.pk)

        return instance
예제 #33
0
def clean_table_name(domain, readable_name):
    """
    Slugifies and truncates readable name to make a valid configurable report table name.
    """
    name_slug = '_'.join(unidecode(readable_name).lower().split(' '))
    # 63 = max postgres table name, 24 = table name prefix + hash overhead
    max_length = 63 - len(domain) - 24
    return name_slug[:max_length]
예제 #34
0
파일: line.py 프로젝트: raviqqe/shakyo
 def _normalize_char(cls, char):
   if cls._ASCIIZE:
     return [character.Character(string_char, char.attr)
             for string_char in text_unidecode.unidecode(str(char))]
   return [character.Character(string_char, char.attr)
           if not unicodedata.category(string_char).startswith("Z")
           else character.Character(' ', char.attr)
           for string_char in unicodedata.normalize("NFC", str(char))]
예제 #35
0
def make_list_better(list_item):
    """
    The unicode stuff makes me crazy. This fixes it
    """
    print list_item[0][0]
    better = unidecode(list_item)
    print list_item[0]
    print better
    return better
예제 #36
0
파일: forms.py 프로젝트: arneb/saleor
 def save(self, commit=True):
     self.instance.slug = slugify(unidecode(self.instance.name))
     if self.parent_pk:
         self.instance.parent = get_object_or_404(
             Category, pk=self.parent_pk)
     if self.instance.parent and self.instance.parent.hidden:
         self.instance.hidden = True
     super(CategoryForm, self).save(commit=commit)
     self.instance.set_hidden_descendants(self.cleaned_data['hidden'])
     return self.instance
예제 #37
0
    def test_ascii_rank(self):
        from text_unidecode import unidecode

        strings = [u"a", u"az", u"aaaa", u"azzz", u"zaaa", u"jazz", u"ball", u"a ball", u"łukąźć", u"ołówek", u"♧"]

        ranks = [get_ascii_string_rank(s) for s in strings]

        # Ordering the ranks should result in the same order as the strings.
        self.assertEqual(
            [get_ascii_string_rank(s) for s in sorted([unidecode(s) for s in strings])],
            sorted(ranks)
        )
예제 #38
0
파일: forms.py 프로젝트: mirumee/saleor
    def save(self, commit=True):
        self.instance.slug = slugify(unidecode(self.instance.name))

        if self.parent_pk:
            self.instance.parent = get_object_or_404(Category, pk=self.parent_pk)

        instance = super().save(commit=commit)

        if instance.pk and "background_image" in self.changed_data:
            create_category_background_image_thumbnails.delay(instance.pk)

        return instance
예제 #39
0
def get_ascii_string_rank(string, max_digits=9):
    """Convert a string into a number such that when the numbers are sorted
    they maintain the lexicographic sort order of the words they represent.

    The number of characters in the string for which lexicographic order will
    be maintained depends on max_digits. For the default of 9, the number of
    chars that the order is maintained for is 5.

    Unfortunately this basically means:

    >>> get_ascii_string_rank("Python") == get_ascii_string_rank("Pythonic")
    True

    when obviously it'd be better if the rank for "Pythonic" was > than the
    rank for "Python" since "Pythonic" is alphabetically after "Python".
    """
    # Smallest ordinal value we take into account
    smallest_ord = ord(u"A")
    # Ord value to use for punctuation - we define punctuation as ordering after
    # all letters in the alphabet
    punctuation_ord = smallest_ord - 1
    # Offset to normalize the actual ord value by. 11 is taken off because
    # otherwise the values for words starting with 'A' would start with '00'
    # which would be ignored when cast to an int
    offset = smallest_ord - 11
    # Fn to get the normalized ordinal
    get_ord = lambda c: (ord(c) if c.isalpha() else punctuation_ord) - offset
    # Padding for the string if it's shorter than `max_digits`
    padding = chr(punctuation_ord) * max_digits

    if HAS_UNIDECODE:
        # And parse it with unidecode to get rid of non-ascii characters
        string = unidecode(string)
    else:
        logging.warning(
            'text_unidecode package not found. If a string with non-ascii chars '
            'is used for a document rank it may result in unexpected ordering'
        )

    # Get the ordinals...
    ords = [get_ord(c) for c in (string + padding)]
    # Concat them, making sure they're all 2 digits long
    joinable = [str(o).zfill(2) for o in ords]
    # Cast back to an int, making sure it's at at most `max_digits` long
    return int("".join(joinable)[:max_digits])
예제 #40
0
파일: text.py 프로젝트: smartstudy/midauth
def slugify(text, delim=u'-'):
    """Generates an ASCII-only slug

    :param text: text to be translated into a slug
    :type text: unicode
    :param delim: delimiter that replace any punctuations and whitespaces
    :type delim: unicode
    :returns: an URL-safe, ASCII-only slug
    :rtype: unicode

    .. seealso:: http://flask.pocoo.org/snippets/5/

    """
    if not isinstance(text, unicode):
        raise TypeError('text should be an unicode, not {0}'.format(text))
    text = text.lower()
    result = (w for word in _punct_re.split(text)
                for w in unidecode(word).split())
    return unicode(delim.join(result))
예제 #41
0
    def convert_unicode_punctuation(self, word):
        word_converted_punct = []
        for c in word:
            decoded_c = unidecode(c).lower()
            if len(decoded_c) == 0:
                # Cannot decode to anything reasonable
                word_converted_punct.append(c)
            else:
                # Check if all punctuation and therefore fine
                # to include unidecoded version
                allowed_punct = punct_word(
                    decoded_c,
                    punctuation=ALLOWED_CONVERTED_UNICODE_PUNCTUATION)

                if allowed_punct:
                    word_converted_punct.append(decoded_c)
                else:
                    word_converted_punct.append(c)
        return ''.join(word_converted_punct)
예제 #42
0
파일: forms.py 프로젝트: zdobooto/saleor
 def save(self, commit=True):
     self.instance.slug = slugify(unidecode(self.instance.name))
     return super().save(commit=commit)
예제 #43
0
def test_7bit_text_purity():
    txt = "".join([chr(x) for x in range(128)])
    assert unidecode(txt) == txt
예제 #44
0
def test_7bit_purity(code):
    ch = chr(code)
    assert unidecode(ch) == ch
예제 #45
0
def test_transliterate(text, result):
    assert unidecode(text) == result
예제 #46
0
파일: forms.py 프로젝트: artursmet/saleor
 def save(self, commit=True):
     self.instance.slug = slugify(unidecode(self.instance.name))
     if self.parent_pk:
         self.instance.parent = get_object_or_404(
             Category, pk=self.parent_pk)
     return super().save(commit=commit)
예제 #47
0
 def get_slug(self):
     return slugify(smart_text(unidecode(self.name)))
예제 #48
0
파일: __init__.py 프로젝트: joke2k/faker
    def _to_ascii(self, string):
        for search, replace in self.replacements:
            string = string.replace(search, replace)

        string = unidecode(string)
        return string
예제 #49
0
def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
            separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
            replacements=()):
    """
    Make a slug from the given text.
    :param text (str): initial text
    :param entities (bool):
    :param decimal (bool):
    :param hexadecimal (bool):
    :param max_length (int): output string length
    :param word_boundary (bool):
    :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
    :param separator (str): separator between words
    :param stopwords (iterable): words to discount
    :param regex_pattern (str): regex pattern for allowed characters
    :param lowercase (bool): activate case sensitivity by setting it to False
    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
    :return (str):
    """

    # user-specific replacements
    if replacements:
        for old, new in replacements:
            text = text.replace(old, new)

    # ensure text is unicode
    if not isinstance(text, _unicode_type):
        text = _unicode(text, 'utf-8', 'ignore')

    # replace quotes with dashes - pre-process
    text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)

    # decode unicode
    text = unidecode.unidecode(text)

    # ensure text is still in unicode
    if not isinstance(text, _unicode_type):
        text = _unicode(text, 'utf-8', 'ignore')

    # character entity reference
    if entities:
        text = CHAR_ENTITY_PATTERN.sub(lambda m: unichr(name2codepoint[m.group(1)]), text)

    # decimal character reference
    if decimal:
        try:
            text = DECIMAL_PATTERN.sub(lambda m: unichr(int(m.group(1))), text)
        except Exception:
            pass

    # hexadecimal character reference
    if hexadecimal:
        try:
            text = HEX_PATTERN.sub(lambda m: unichr(int(m.group(1), 16)), text)
        except Exception:
            pass

    # translate
    text = unicodedata.normalize('NFKD', text)
    if sys.version_info < (3,):
        text = text.encode('ascii', 'ignore')

    # make the text lowercase (optional)
    if lowercase:
        text = text.lower()

    # remove generated quotes -- post-process
    text = QUOTE_PATTERN.sub('', text)

    # cleanup numbers
    text = NUMBERS_PATTERN.sub('', text)

    # replace all other unwanted characters
    if lowercase:
        pattern = regex_pattern or ALLOWED_CHARS_PATTERN
    else:
        pattern = regex_pattern or ALLOWED_CHARS_PATTERN_WITH_UPPERCASE
    text = re.sub(pattern, DEFAULT_SEPARATOR, text)

    # remove redundant
    text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)

    # remove stopwords
    if stopwords:
        if lowercase:
            stopwords_lower = [s.lower() for s in stopwords]
            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
        else:
            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
        text = DEFAULT_SEPARATOR.join(words)

    # finalize user-specific replacements
    if replacements:
        for old, new in replacements:
            text = text.replace(old, new)

    # smart truncate if requested
    if max_length > 0:
        text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)

    if separator != DEFAULT_SEPARATOR:
        text = text.replace(DEFAULT_SEPARATOR, separator)

    return text