def get_free_text(self, parameters=None):
        """Builds the freetext associated with the github profile

        Parameters
        ----------
        parameters : dictionary
        Options for what free text is wanted
        """
        if parameters is None:
            parameters = {"commits": 25, "issues": 25}
        free_text = ""

        for key, n in parameters.items():
            generator = None
            if key == "commits":
                generator = self._commits
            elif key == "issues":
                generator = self._issues

            if generator is not None:
                for _ in range(n):
                    item = generator_pop(generator)
                    if item is not None:
                        free_text += item.get_free_text()

        free_text = anonymise_text(free_text).replace("_", " ")
        return free_text
Ejemplo n.º 2
0
    def get_free_text(self, parameters=None):
        """Builds the freetext associated with the stack overflow profile

        Parameters
        ----------
        parameters : dictionary
        Options for what free text is wanted
        """
        if parameters is None:
            parameters = {"answered_posts": 25, "asked_posts": 25, "top_tags": 0}
        free_text = ""

        for key, n in parameters.items():
            generator = None
            if key == "answered_posts":
                generator = self._answered_posts
            elif key == "asked_posts":
                generator = self._asked_posts
            elif key == "top_tags":
                generator = self._top_tags

            if generator is not None:
                for _ in range(n):
                    item = generator_pop(generator)
                    if type(item) == StackOverflowPost:
                        free_text += item.get_free_text() + '\n'
                    elif type(item) == str:
                        free_text += item + '\n'

        free_text = anonymise_text(free_text).replace("_", " ")
        return free_text
def scanProfile(url, session=None):
    html = scraper_methods.get_html(url, session)
    soup = BeautifulSoup(html, "lxml")

    text = soup.text
    text = scraper_methods.anonymise_text(text)
    text = re.sub("\\r\\n|\\n", " ", text)
    writeText(text, profiles_filename)
Ejemplo n.º 4
0
    def get_free_text(self, training=False):
        """Builds the freetext associated with the stack overflow post

        Parameters
        ----------
        training : bool
        Used to indicate if we are obtaining training data(if so we include labels)
        """
        if training:
            labels_prefix = "__label__ " + " __label__ ".join(self._post_tags)
            free_text = "{labels} {title} {post} {answers}".format(labels=labels_prefix, title=self._title,
                                                                   post=self._post,
                                                                   answers=" ".join(self._answers))
            free_text = anonymise_text(free_text)
        else:
            free_text = "{title} {post} {answers}".format(title=self._title,
                                                          post=self._post,
                                                          answers=" ".join(self._answers))
        return free_text
    def get_free_text(self, training=False):
        """Builds the freetext associated with the github issue

        Parameters
        ----------
        training : bool
        Used to indicate if we are obtaining training data(if so we include labels)
        """
        title_tokens = [tag[0] for tag in tokenize_title(self._title)]

        labels_prefix = "__label__ " + " __label__ ".join(title_tokens)
        if training:
            freetext = "{labels} {post}\n".format(labels=labels_prefix,
                                                  post=self._post)
            freetext = anonymise_text(freetext).replace("_", " ")
        else:
            freetext = "{title} {post}\n".format(title=self._title,
                                                 post=self._post)

        return freetext
    def get_free_text(self, training=False):
        """Builds the freetext associated with the github commit

        Parameters
        ----------
        training : bool
        Used to indicate if we are obtaining training data(if so we include labels)
        """
        if training:
            title_tokens = {tag[0] for tag in tokenize_title(self._title)}
            title_tokens.update(self.get_code_tags())
            labels_prefix = "__label__ " + " __label__ ".join(title_tokens)
            free_text = "{labels_prefix} {code}\n".format(
                labels_prefix=labels_prefix, code=" ".join(self._code_lines))
            free_text = anonymise_text(free_text).replace("_", " ")
        else:
            free_text = "{title} {code}\n".format(title=self._title,
                                                  code=" ".join(
                                                      self._code_lines))

        return free_text