예제 #1
0
파일: ownyt.py 프로젝트: kernc/orange3-text
    def retrieve_remaining_records(self):
        self.error(1)
        # If a query is running, stop it.
        if self.query_running:
            self.query_running = False
            return

        if self.nyt_api:
            num_steps = min(math.ceil(self.all_hits/10), 100)

            # Update buttons.
            self.retrieve_other_button.setText('Stop retrieving')
            self.open_set_api_key_dialog_button.setEnabled(False)
            self.run_query_button.setEnabled(False)

            # Accumulate remaining results in these lists.
            remaining_docs = []
            num_metas = len(self.output_corpus.domain.metas)
            remaining_metas = np.empty((0, num_metas), dtype=object)
            remaining_classes = []

            self.query_running = True
            self.progressBarInit()
            for i in range(int(self.num_retrieved/10), num_steps):
                # Stop querying if the flag is not set.
                if not self.query_running:
                    break

                # Update the progress bar.
                self.progressBarSet(100.0 * (i/num_steps))

                res, cached, error = self.nyt_api.execute_query(i)

                if res:
                    docs, metas, meta_vars, class_values = parse_record_json(res, self.nyt_api.includes_fields)
                    remaining_docs += docs
                    remaining_metas = np.vstack((remaining_metas, np.array(metas)))
                    remaining_classes += class_values

                    # Update the info label.
                    self.num_retrieved += len(res["response"]["docs"])
                    info_label = "Records: {}\nRetrieved: {}".format(self.all_hits, self.num_retrieved)
                    if self.all_hits > 1000:
                        info_label += " (max 1000)"
                    self.query_info_label.setText(info_label)

                    if not cached:  # Only wait if an actual request was made.
                        sleep(1)
                else:
                    if error:
                        if isinstance(error, HTTPError):
                            self.error(1, "An error occurred(HTTP {})".format(error.code))
                        elif isinstance(error, URLError):
                            self.error(1, "An error occurred(URL {})".format(error.reason))
                        break
            self.progressBarFinished()
            self.query_running = False

            # Update the corpus.
            self.output_corpus.extend_corpus(remaining_docs, remaining_metas, remaining_classes, meta_vars)
            self.send(Output.CORPUS, self.output_corpus)

            if self.num_retrieved == min(self.all_hits, 1000):
                self.retrieve_other_button.setText('All available records retrieved')
                self.retrieve_other_button.setEnabled(False)
            else:
                self.retrieve_other_button.setText('Retrieve remaining records ({})'
                                                   .format(min(self.all_hits, 1000)-self.num_retrieved))
                self.retrieve_other_button.setFocus()

            self.open_set_api_key_dialog_button.setEnabled(True)
            self.run_query_button.setEnabled(True)
예제 #2
0
    def retrieve_remaining_records(self):
        self.error(1)
        # If a query is running, stop it.
        if self.query_running:
            self.query_running = False
            return

        if self.nyt_api:
            num_steps = min(math.ceil(self.all_hits / 10), 100)

            # Update buttons.
            self.retrieve_other_button.setText('Stop retrieving')
            self.open_set_api_key_dialog_button.setEnabled(False)
            self.run_query_button.setEnabled(False)

            # Accumulate remaining results in these lists.
            remaining_docs = []
            num_metas = len(self.output_corpus.domain.metas)
            remaining_metas = np.empty((0, num_metas), dtype=object)
            remaining_classes = []

            self.query_running = True
            self.progressBarInit()
            for i in range(int(self.num_retrieved / 10), num_steps):
                # Stop querying if the flag is not set.
                if not self.query_running:
                    break

                # Update the progress bar.
                self.progressBarSet(100.0 * (i / num_steps))

                res, cached, error = self.nyt_api.execute_query(i)

                if res:
                    docs, metas, meta_vars, class_values = parse_record_json(
                        res, self.nyt_api.includes_fields)
                    remaining_docs += docs
                    remaining_metas = np.vstack(
                        (remaining_metas, np.array(metas)))
                    remaining_classes += class_values

                    # Update the info label.
                    self.num_retrieved += len(res["response"]["docs"])
                    info_label = "Records: {}\nRetrieved: {}".format(
                        self.all_hits, self.num_retrieved)
                    if self.all_hits > 1000:
                        info_label += " (max 1000)"
                    self.query_info_label.setText(info_label)

                    if not cached:  # Only wait if an actual request was made.
                        sleep(1)
                else:
                    if error:
                        if isinstance(error, HTTPError):
                            self.error(
                                1, "An error occurred(HTTP {})".format(
                                    error.code))
                        elif isinstance(error, URLError):
                            self.error(
                                1, "An error occurred(URL {})".format(
                                    error.reason))
                        break
            self.progressBarFinished()
            self.query_running = False

            # Update the corpus.
            self.output_corpus.extend_corpus(remaining_docs, remaining_metas,
                                             remaining_classes, meta_vars)
            self.send(Output.CORPUS, self.output_corpus)

            if self.num_retrieved == min(self.all_hits, 1000):
                self.retrieve_other_button.setText(
                    'All available records retrieved')
                self.retrieve_other_button.setEnabled(False)
            else:
                self.retrieve_other_button.setText(
                    'Retrieve remaining records ({})'.format(
                        min(self.all_hits, 1000) - self.num_retrieved))
                self.retrieve_other_button.setFocus()

            self.open_set_api_key_dialog_button.setEnabled(True)
            self.run_query_button.setEnabled(True)
예제 #3
0
파일: ownyt.py 프로젝트: kernc/orange3-text
    def run_initial_query(self):
        self.warning(1)
        self.error(1)
        # Only execute if the NYT object is present(safety lock).
        # Otherwise this method cannot be called anyway.
        if self.nyt_api:
            # Query keywords.
            qkw = self.query_combo.currentText()

            # Text fields.
            text_includes_params = [self.includes_headline, self.includes_lead_paragraph, self.includes_snippet,
                                    self.includes_abstract, self.includes_keywords]

            if True not in text_includes_params:
                self.warning(1, "You must select at least one text field.")
                return

            # Set the query url.
            self.nyt_api.set_query_url(qkw, self.year_from, self.year_to, text_includes_params)

            # Execute the query.
            res, cached, error = self.nyt_api.execute_query(0)

            if res:
                # Construct a corpus for the output.
                documents, metas, meta_vars, class_values = parse_record_json(res, text_includes_params)
                class_vars = [DiscreteVariable("section_name", values=list(set(class_values)))]
                Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]
                Y[np.isnan(Y)] = 0
                domain = Domain([], class_vars=class_vars, metas=meta_vars)

                self.output_corpus = Corpus(documents, None, Y, metas, domain)
                self.send(Output.CORPUS, self.output_corpus)

                # Update the response info.
                self.all_hits = res["response"]["meta"]["hits"]
                self.num_retrieved = len(res["response"]["docs"])
                info_label = "Records: {}\nRetrieved: {}".format(self.all_hits, self.num_retrieved)
                if self.all_hits > 1000:
                    info_label += " (max 1000)"
                self.query_info_label.setText(info_label)

                # Enable 'retrieve remaining' button.
                if self.num_retrieved < min(self.all_hits, 1000):
                    self.retrieve_other_button.setText('Retrieve remaining records ({})'
                                                       .format(min(self.all_hits, 1000)-self.num_retrieved))
                    self.retrieve_other_button.setEnabled(True)
                    self.retrieve_other_button.setFocus()
                else:
                    self.retrieve_other_button.setText('All records retrieved')
                    self.retrieve_other_button.setEnabled(False)

                # Add the query to history.
                if qkw not in self.recent_queries:
                    self.recent_queries.insert(0, qkw)
            else:
                if error:
                    if isinstance(error, HTTPError):
                        self.error(1, "An error occurred(HTTP {})".format(error.code))
                    elif isinstance(error, URLError):
                        self.error(1, "An error occurred(URL {})".format(error.reason))
예제 #4
0
    def run_initial_query(self):
        self.warning(1)
        self.error(1)
        # Only execute if the NYT object is present(safety lock).
        # Otherwise this method cannot be called anyway.
        if self.nyt_api:
            # Query keywords.
            qkw = self.query_combo.currentText()

            # Text fields.
            text_includes_params = [
                self.includes_headline, self.includes_lead_paragraph,
                self.includes_snippet, self.includes_abstract,
                self.includes_keywords
            ]

            if True not in text_includes_params:
                self.warning(1, "You must select at least one text field.")
                return

            # Set the query url.
            self.nyt_api.set_query_url(qkw, self.year_from, self.year_to,
                                       text_includes_params)

            # Execute the query.
            res, cached, error = self.nyt_api.execute_query(0)

            if res:
                # Construct a corpus for the output.
                documents, metas, meta_vars, class_values = parse_record_json(
                    res, text_includes_params)
                class_vars = [
                    DiscreteVariable("section_name",
                                     values=list(set(class_values)))
                ]
                Y = np.array([class_vars[0].to_val(cv)
                              for cv in class_values])[:, None]
                Y[np.isnan(Y)] = 0
                domain = Domain([], class_vars=class_vars, metas=meta_vars)

                self.output_corpus = Corpus(documents, None, Y, metas, domain)
                self.send(Output.CORPUS, self.output_corpus)

                # Update the response info.
                self.all_hits = res["response"]["meta"]["hits"]
                self.num_retrieved = len(res["response"]["docs"])
                info_label = "Records: {}\nRetrieved: {}".format(
                    self.all_hits, self.num_retrieved)
                if self.all_hits > 1000:
                    info_label += " (max 1000)"
                self.query_info_label.setText(info_label)

                # Enable 'retrieve remaining' button.
                if self.num_retrieved < min(self.all_hits, 1000):
                    self.retrieve_other_button.setText(
                        'Retrieve remaining records ({})'.format(
                            min(self.all_hits, 1000) - self.num_retrieved))
                    self.retrieve_other_button.setEnabled(True)
                    self.retrieve_other_button.setFocus()
                else:
                    self.retrieve_other_button.setText('All records retrieved')
                    self.retrieve_other_button.setEnabled(False)

                # Add the query to history.
                if qkw not in self.recent_queries:
                    self.recent_queries.insert(0, qkw)
            else:
                if error:
                    if isinstance(error, HTTPError):
                        self.error(
                            1, "An error occurred(HTTP {})".format(error.code))
                    elif isinstance(error, URLError):
                        self.error(
                            1,
                            "An error occurred(URL {})".format(error.reason))