Python text Beispiele, ytd.compat.text Python Beispiele

Beispiel #1

0

Datei anzeigen

def downloadEverything(downloader, tickerType, insecure, sleeptime, pandantic):

    loop = 0
    while not downloader.isDone():

        symbols = downloader.nextRequest(insecure, pandantic)
        print("Got " + str(len(symbols)) + " downloaded " + downloader.type +
              " symbols:")
        if (len(symbols) > 2):
            try:
                print(" " + text(symbols[0]))
                print(" " + text(symbols[1]))
                print("  ect...")
            except:
                print(
                    " Could not display some ticker symbols due to char encoding"
                )
        downloader.printProgress()

        # Save download state occasionally.
        # We do this in case this long running is suddenly interrupted.
        loop = loop + 1
        if loop % 200 == 0:
            print("Saving downloader to disk...")
            saveDownloader(downloader, tickerType)
            print("Downloader successfully saved.")
            print("")

        if not downloader.isDone():
            sleep(sleeptime)  # So we don't overload the server.

Beispiel #2

0

Datei anzeigen

Datei: YahooTickerDownloader.py Projekt: Benny-/Yahoo-ticker-symbol-downloader

def downloadEverything(downloader, tickerType, insecure, sleeptime, pandantic):

    loop = 0
    while not downloader.isDone():

        symbols = downloader.nextRequest(insecure, pandantic)
        print("Got " + str(len(symbols)) + " downloaded " + downloader.type + " symbols:")
        if(len(symbols) > 2):
            try:
                print (" " + text(symbols[0]))
                print (" " + text(symbols[1]))
                print ("  ect...")
            except:
                print (" Could not display some ticker symbols due to char encoding")
        downloader.printProgress()

        # Save download state occasionally.
        # We do this in case this long running is suddenly interrupted.
        loop = loop + 1
        if loop % 200 == 0:
            print ("Saving downloader to disk...")
            saveDownloader(downloader, tickerType)
            print ("Downloader successfully saved.")
            print ("")

        if not downloader.isDone():
            sleep(sleeptime)  # So we don't overload the server.

Beispiel #3

0

Datei anzeigen

Datei: SymbolDownloader.py Projekt: ninetyfivenorth/Yahoo-ticker-symbol-downloader

    def _fetch(self, insecure, market):
        params = {
            'm': market,
            'b': text(self.current_q_item_offset),
            's': self.current_q,
            't': self.type[0].upper(),
            'p': 1,
        }
        query_string = {
            'device': 'console',
            'returnMeta': 'true',
        }
        protocol = 'http' if insecure else 'https'
        req = requests.Request(
            'GET',
            protocol +
            '://finance.yahoo.com/_finance_doubledown/api/resource/finance.yfinlist.symbol_lookup'
            + self._encodeParams(params),
            headers={'User-agent': user_agent},
            params=query_string)
        req = req.prepare()
        print("req " + req.url)
        resp = self.rsession.send(req, timeout=(12, 12))
        resp.raise_for_status()

        if self.current_q_item_offset > 2000:  # Y! stops returning symbols at offset > 2000, workaround: add finer granulated search query
            self._add_queries(self.current_q)

        return resp.json()

Beispiel #4

0

Datei anzeigen

    def _fetch_processor(self):
        while True:
            (current_query, json, msg) = self.fetch_returns.get()
            (symbols, count) = self.decodeSymbolsContainer(json)

            for symbol in symbols:
                self.symbols[symbol.ticker] = symbol
                # record symbols returned for this query
                current_query.results.append(symbol.ticker)

            if (count > 10):
                # This should never happen with this API, it always returns at most 10 items
                raise Exception("Funny things are happening: count " +
                                text(count) + " > 10. Content:\n" + repr(json))

            # There is no pagination with this API.
            # If we receive X results, we assume there are more than X and
            #  add another layer of queries to narrow the search further
            # In the past, X was known to be 10. Now it is some number 1 < X <= 10
            if self.result_count_action[count] is None:
                # the action for this number of results is unknown,
                # so assume search narrowing is required
                self._add_queries(current_query, general_search_characters)
            elif self.result_count_action[count]:
                # this number of results is known to require search narrowing
                self._add_queries(current_query, general_search_characters)
            else:
                # Tell the query it's done
                current_query.done()

            print(msg)
            self.status_print(symbols)
            self.completed_queries.append(current_query)
            self.fetch_returns.task_done()

Beispiel #5

0

Datei anzeigen

Datei: YahooTickerDownloader.py Projekt: mosegui/YahooFinanceDashboard

def exportFile(data, downloader, file_format):

    exporting_function = {'xlsx': data.xlsx,
                          'json': data.json.encode('UTF-8'),
                          'yaml': data.yaml.encode('UTF-8')}

    if file_format == 'csv':
        with io.open(downloader.type + '.csv', 'w', encoding='utf-8') as f:
            f.write(text.join(u',', data.headers) + '\n')
            writer = csv.writer(f)
            for i in range(0, len(data)):
                row = [text(y) if not y is None else u"" for y in data[i]]
                writer.writerow(row)

    elif file_format == 'sqlite':
        db = sqlite3.connect(f'{downloader.type}.{file_format}')
        df = data.export('df')
        df.to_sql('YAHOO_TICKERS', db, if_exists='replace')
        db.commit()
        db.close()

    elif file_format in [item for item in formats if item != 'csv']:
        try:
            with open(f'{downloader.type}.{file_format}', 'wb') as f:
                f.write(exporting_function[file_format])
        except:
            logger.warning(f"Could not export .{file_format} due to a internal error")
    else:
        logger.error('Unknown output format')

Beispiel #6

0

Datei anzeigen

    def nextRequest(self, insecure=False, pandantic=False):
        if not self.IsFirstRequest:
            self._nextQuery()
        else:
            self.IsFirstRequest=False
            
        success = False
        retryCount = 0
        json = None
        # Eponential back-off algorithm
        # to attempt 10 more times sleeping 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024 seconds
        # respectively.
        maxRetries = 10
        while(success == False):
            try:
                json = self._fetch(insecure)
                success = True
            except (requests.HTTPError,
                    requests.exceptions.ChunkedEncodingError,
                    requests.exceptions.ReadTimeout,
                    requests.exceptions.ConnectionError) as ex:
                if retryCount < maxRetries:
                    attempt = retryCount + 1
                    sleepAmt = int(math.pow(2,attempt))
                    print("Retry attempt: " + str(attempt) + " of " + str(maxRetries) + "."
                        " Sleep period: " + str(sleepAmt) + " seconds."
                        )
                    sleep(sleepAmt)
                    retryCount = attempt
                else:
                    raise

        (symbols, count) = self.decodeSymbolsContainer(json)

        for symbol in symbols:
            self.symbols[symbol.ticker] = symbol

        # There is no pagination with this API.
				# If we receive 10 results, we assume there are more than 10 and add another layer of queries to narrow the search further
        if(count == 10):
            self._add_queries(self.current_q)
        elif(count > 10):
            # This should never happen with this API, it always returns at most 10 items
            raise Exception("Funny things are happening: count "
                            + text(count)
                            + " > 10. "
                            + "Content:"
                            + "\n"
                            + repr(json))

        if self._getQueryIndex() + 1 >= len(self.queries):
            self.done = True
        else:
            self.done = False

        return symbols

Beispiel #7

0

Datei anzeigen

    def nextRequest(self, pbar, insecure=False, pandantic=False):
        self._nextQuery()
        success = False
        retryCount = 0
        json = None
        # Eponential back-off algorithm
        # to attempt 5 more times sleeping x, x^2, x^3, x^4, x^5 seconds respectively.
        maxRetries = 5
        firstSleep = 5  # seconds
        while not success:
            try:
                json = self._fetch(insecure)
                success = True
            except (requests.HTTPError,
                    requests.exceptions.ChunkedEncodingError,
                    requests.exceptions.ReadTimeout,
                    requests.exceptions.ConnectionError) as ex:
                if retryCount < maxRetries:
                    retryCount += 1
                    sleepAmt = int(math.pow(firstSleep, retryCount))
                    pbar.write("Retry attempt: " + str(retryCount) + " of " + str(maxRetries) + "."
                               " Sleep period: " + str(sleepAmt) + " seconds.")
                    sleep(sleepAmt)
                    # Recreate the session after sleeping.
                    self.rsession = requests.Session()
                else:
                    raise ex

        (symbols, count) = self.decodeSymbolsContainer(json)

        for symbol in symbols:
            self.symbols[symbol.ticker] = symbol

        # There is no pagination with this API.
        # If we receive 10 results, add another layer of queries by expending the query to narrow the search further.
        if(count == 10):
            self._add_queries(self.queries[self.idx])
        elif(count > 10):
            # This should never happen with this API, it always returns at most 10 items
            raise Exception("Funny things are happening: count "
                            + text(count)
                            + " > 10. "
                            + "Content:"
                            + "\n"
                            + repr(json))

        if self.idx + 1 >= len(self.queries):
            self.done = True
        else:
            self.done = False

        return symbols

Beispiel #8

0

Datei anzeigen

    def nextRequest(self, insecure=False, pandantic=False):
        self._nextQuery()
        success = False
        retryCount = 0
        json = None
        # Eponential back-off algorithm
        # to attempt 5 more times sleeping 5, 25, 125, 625, 3125 seconds
        # respectively.
        maxRetries = 5
        while (success == False):
            try:
                json = self._fetch(insecure)
                success = True
            except (requests.HTTPError,
                    requests.exceptions.ChunkedEncodingError,
                    requests.exceptions.ReadTimeout,
                    requests.exceptions.ConnectionError) as ex:
                if retryCount < maxRetries:
                    attempt = retryCount + 1
                    sleepAmt = int(math.pow(5, attempt))
                    print("Retry attempt: " + str(attempt) + " of " +
                          str(maxRetries) + "."
                          " Sleep period: " + str(sleepAmt) + " seconds.")
                    sleep(sleepAmt)
                    retryCount = attempt
                else:
                    raise

        (symbols, count) = self.decodeSymbolsContainer(json)

        for symbol in symbols:
            self.symbols[symbol.ticker] = symbol

        # There is no pagination with this API.
# If we receive 10 results, we assume there are more than 10 and add another layer of queries to narrow the search further
# original test on ==10 but yahoo sometimes stops earlier, so replaced with >7 (tunable)
        if (7 < count <= 10):
            self._add_queries(self.current_q)
        elif (count > 10):
            # This should never happen with this API, it always returns at most 10 items
            raise Exception("Funny things are happening: count " +
                            text(count) + " > 10. " + "Content:" + "\n" +
                            repr(json))

        #test if queries[0]has been searched before signalling done
        if self._getQueryIndex() + 1 >= len(self.queries):
            self.done = True
        else:
            self.done = False

        return symbols

Beispiel #9

0

Datei anzeigen

def main():
    downloader = None

    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--insecure",
                        help="use HTTP instead of HTTPS",
                        action="store_true")
    parser.add_argument(
        "-e",
        "--export",
        help=
        "export immediately without downloading (Only useful if you already downloaded something to the .pickle file)",
        action="store_true")
    parser.add_argument(
        '-E',
        '--Exchange',
        help=
        'Only export ticker symbols from this exchange (the filtering is done during the export phase)'
    )
    parser.add_argument('type',
                        nargs='?',
                        default='tiger',
                        help='The type to download, this can be: ' +
                        " ".join(list(options.keys())))
    parser.add_argument("-s",
                        "--sleep",
                        help="The time to sleep in seconds between requests",
                        type=float,
                        default=0)
    parser.add_argument(
        "-p",
        "--pandantic",
        help="Stop and warn the user if some rare assertion fails",
        action="store_true")

    args = parser.parse_args()

    protocol = 'http' if args.insecure else 'https'
    if args.insecure:
        print("Using insecure connection")

    if args.export:
        print("Exporting pickle file")

    tickerType = args.type = args.type.lower()

    print("Checking if we can resume a old download session")
    try:
        downloader = loadDownloader(tickerType)
        print("Downloader found on disk, resuming")
    except:
        print("No old downloader found on disk")
        print("Starting a new session")
        if tickerType not in options:
            print("Error: " + tickerType +
                  " is not a valid type option. See --help")
            exit(1)
        else:
            downloader = options[tickerType]

    rp = robotparser.RobotFileParser()
    rp.set_url(protocol + '://finance.yahoo.com/robots.txt')
    rp.read()
    try:
        if not args.export:

            if (not rp.can_fetch(
                    user_agent, protocol +
                    '://finance.yahoo.com/_finance_doubledown/api/resource/searchassist'
            )):
                print('Execution of script halted due to robots.txt')
                return 1

            if not downloader.isDone():
                print("Downloading " + downloader.type)
                print("")
                downloadEverything(downloader, tickerType, args.insecure,
                                   args.sleep, args.pandantic)
                print("Saving downloader to disk...")
                saveDownloader(downloader, tickerType)
                print("Downloader successfully saved.")
                print("")
            else:
                print(
                    "The downloader has already finished downloading everything"
                )
                print("")

    except Exception as ex:
        print(
            "A exception occurred while downloading. Suspending downloader to disk"
        )
        saveDownloader(downloader, tickerType)
        print("Successfully saved download state")
        print("Try removing {type}.pickle file if this error persists")
        print(
            "Issues can be reported on https://github.com/Benny-/Yahoo-ticker-symbol-downloader/issues"
        )
        print("")
        raise
    except KeyboardInterrupt as ex:
        print("Suspending downloader to disk as .pickle file")
        saveDownloader(downloader, tickerType)

    if downloader.isDone() or args.export:
        print("Exporting " + downloader.type + " symbols")

        data = tablib.Dataset()
        data.headers = downloader.getRowHeader()

        for symbol in downloader.getCollectedSymbols():
            if (args.Exchange == None):
                data.append(symbol.getRow())
            elif (symbol.exchange == args.Exchange):
                data.append(symbol.getRow())

        with io.open(downloader.type + '.csv', 'w', encoding='utf-8') as f:
            f.write(text.join(u',', data.headers) + '\n')
            writer = csv.writer(f)
            for i in range(0, len(data)):
                row = [text(y) if not y is None else u"" for y in data[i]]
                writer.writerow(row)

        try:
            with open(downloader.type + '.xlsx', 'wb') as f:
                f.write(data.xlsx)
        except:
            print("Could not export .xlsx due to a internal error")

        try:
            with open(downloader.type + '.json', 'wb') as f:
                f.write(data.json.encode('UTF-8'))
        except:
            print("Could not export .json due to a internal error")

        try:
            with open(downloader.type + '.yaml', 'wb') as f:
                f.write(data.yaml.encode('UTF-8'))
        except:
            print("Could not export .yaml due to a internal error")

Beispiel #10

0

Datei anzeigen

Datei: SimpleSymbolDownloader.py Projekt: vladwulf/Yahoo-ticker-symbol-downloader

 def _encodeParams(self, params):
     encoded = ''
     for key, value in params.items():
         encoded += ';' + quote(key) + '=' + quote(text(value))
     return encoded

Beispiel #11

0

Datei anzeigen

Datei: SymbolDownloader.py Projekt: ninetyfivenorth/Yahoo-ticker-symbol-downloader

    def nextRequest(self, insecure=False, pandantic=False, market='all'):

        # You would expect query_done to be a boolean.
        # But unfortunaly we can't depend on Yahoo telling use if there
        # are any more entries. Only if yahoo tells us x amount of times in
        # succession they are done will we actually go on to the next query.
        if (self.query_done >= self.query_done_max):
            self._nextQuery()

        success = False
        retryCount = 0
        json = None
        # Eponential back-off algorithm
        # to attempt 3 more times sleeping 5, 25, 125 seconds
        # respectively.
        while (success == False):
            try:
                json = self._fetch(insecure, market)
                success = True
            except (requests.HTTPError,
                    requests.exceptions.ChunkedEncodingError,
                    requests.exceptions.ReadTimeout,
                    requests.exceptions.ConnectionError) as ex:
                if retryCount < 3:
                    attempt = retryCount + 1
                    sleepAmt = int(math.pow(5, attempt))
                    print("Retry attempt: " + str(attempt) + "."
                          " Sleep period: " + str(sleepAmt) + " seconds.")
                    sleep(sleepAmt)
                    retryCount = attempt
                else:
                    raise

        (symbols, count) = self.decodeSymbolsContainer(json)

        for symbol in symbols:
            self.symbols[symbol.ticker] = symbol

        current_q_item_offset = self.current_q_item_offset + len(symbols)
        current_q_total_items = count

        if (current_q_item_offset == current_q_total_items):
            self.query_done += 1
        elif (current_q_item_offset > current_q_total_items and pandantic):
            # This should never happen now that we are using the a JSON API
            raise Exception(
                "Funny things are happening: current_q_item_offset " +
                text(current_q_item_offset) + " > " +
                text(self.current_q_total_items) +
                " current_q_total_items. Content:" + "\n" + repr(json))
        else:
            self.query_done = 0

        self.current_q_item_offset = current_q_item_offset
        self.current_q_total_items = current_q_total_items

        if len(symbols) == 0:
            self.current_page_retries += 1
            # Related to issue #4
            # See https://github.com/Benny-/Yahoo-ticker-symbol-downloader/issues/4#issuecomment-51718922
            # Yahoo sometimes gives a "bad" page. There is no way we can determine if we are
            # At the end of pagination or if we happen to get a bad page a few times in a row.
            # So we simply request the page a lot of times. At some point we are fairly certain
            # we are at end of pagination.
            if self.current_page_retries > 20:
                self.query_done = self.query_done + self.query_done_max
        else:
            self.current_page_retries = 0

        if (self.query_done >= self.query_done_max):
            if self._getQueryIndex() + 1 >= len(self.queries):
                self.done = True
            else:
                self.done = False

        return symbols

Beispiel #12

0

Datei anzeigen

    def nextRequest(self, insecure=False, pandantic=False):

        # You would expect query_done to be a boolean.
        # But unfortunaly we can't depend on Yahoo telling use if there
        # are any more entries. Only if yahoo tells us x amount of times in
        # succession they are done will we actually go on to the next query.
        if (self.query_done >= self.query_done_max):
            self._nextQuery()

        success = False
        retryCount = 0
        html = ""
        # _fetchHtml may raise an exception based on response status or
        # if the request caused a transport error.
        # At this point we try a simple exponential back-off algorithm
        # to attempt 3 more times sleeping 5, 25, 125 seconds
        # respectively.
        while (success == False):
            try:
                html = self._fetchHtml(insecure)
                success = True
            except (requests.HTTPError,
                    requests.exceptions.ChunkedEncodingError,
                    requests.exceptions.ReadTimeout,
                    requests.exceptions.ConnectionError) as ex:
                if retryCount < 3:
                    attempt = retryCount + 1
                    sleepAmt = int(math.pow(5, attempt))
                    print("Retry attempt: " + str(attempt) + "."
                          " Sleep period: " + str(sleepAmt) + " seconds.")
                    sleep(sleepAmt)
                    retryCount = attempt
                else:
                    raise

        soup = BeautifulSoup(html, "html.parser")
        symbols = None

        try:
            # A exception is thrown here for the following reasons:
            # 1. Yahoo does not include a table (or any results!) if you
            #    request items at offset 2020 or more
            # 2. Yahoo randomly screws a http request up and table is missing (a bad page).
            #    A succesive http request might not result in a exception here.
            # 3. A TypeError is raised. This is disabled for now.
            #    TypeError should be thrown in the different downloaders like MutualFundDownloader.py
            #    It should be a sanity check to make sure we download the correct type.
            #    But for some reason Yahoo consistently gives back some incorrect types.
            #    Search for Mutual Fund and 1 out of the 20 are ETF's.
            #    I am not sure what is going on.
            #    At the moment the sanity checks have been disabled in the different downloaders.
            symbolsContainer = soup.find("table", {"class": "yui-dt"}).tbody
            symbols = self.decodeSymbolsContainer(symbolsContainer)
        except KeyboardInterrupt as ex:
            raise
        except TypeError as ex:
            raise
        except:
            symbols = []

        for symbol in symbols:
            self.symbols[symbol.ticker] = symbol

        current_q_item_offset = self.current_q_item_offset + len(symbols)
        current_q_total_items = self._getTotalItemsFromSoup(soup)

        if (current_q_total_items != 'Unknown'):
            if (current_q_item_offset == current_q_total_items):
                self.query_done = self.query_done + 1
            elif (current_q_item_offset > current_q_total_items and pandantic):
                # This happens rarely for multiple requests to the same url
                # Output is garanteed to be inconsistent between runs.
                raise Exception(
                    "Funny things are happening: current_q_item_offset " +
                    text(current_q_item_offset) + " > " +
                    text(self.current_q_total_items) +
                    " current_q_total_items. HTML:" + "\n" + text(html))
            else:
                self.query_done = 0

        self.current_q_item_offset = current_q_item_offset
        self.current_q_total_items = current_q_total_items

        if len(symbols) == 0:
            self.current_page_retries += 1
            # Related to issue #4
            # See https://github.com/Benny-/Yahoo-ticker-symbol-downloader/issues/4#issuecomment-51718922
            # Yahoo sometimes gives a "bad" page. There is no way we can determine if we are
            # At the end of pagination or if we happen to get a bad page a few times in a row.
            # So we simply request the page a lot of times. At some point we are fairly certain
            # we are at end of pagination.
            if self.current_page_retries > 20:
                self.query_done = self.query_done + self.query_done_max
        else:
            self.current_page_retries = 0

        if (self.query_done >= self.query_done_max):
            if self._getQueryIndex() + 1 >= len(self.queries):
                self.done = True
            else:
                self.done = False

        return symbols

Beispiel #13

0

Datei anzeigen

Datei: YahooTickerDownloader.py Projekt: paurichardson/Yahoo-ticker-symbol-downloader

def main():
    downloader = None

    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--insecure", help="use HTTP instead of HTTPS", action="store_true")
    parser.add_argument("-e", "--export", help="export immediately without downloading (Only useful if you already downloaded something to the .pickle file)", action="store_true")
    parser.add_argument('-E', '--Exchange', help='Only export ticker symbols from this exchange (the filtering is done during the export phase)')
    parser.add_argument('type', help='The type to download, this can be: '+" ".join(list(options.keys())))
    parser.add_argument("-s", "--sleep", help="The time to sleep in seconds between requests", type=float, default=0)
    parser.add_argument("-p", "--pandantic", help="Stop and warn the user if some rare assertion fails", action="store_true")
    parser.add_argument("-m", "--market", help="Specify the Region of queried exchanges (us = USA+Canada, dr=Germany, fr=France, hk=Hongkong, gb=United Kingdom, default= all)", default="all")

    args = parser.parse_args()

    if args.insecure:
        print("Using insecure connection")

    if args.export:
        print("Exporting pickle file")

    tickerType = args.type = args.type.lower()

    market = args.market = args.market.lower()

    print("Checking if we can resume a old download session")
    try:
        downloader = loadDownloader(tickerType)
        print("Downloader found on disk, resuming")
    except:
        print("No old downloader found on disk")
        print("Starting a new session")
        if tickerType not in options:
            print("Error: " + tickerType + " is not a valid type option. See --help")
            exit(1)
        else:
            downloader = options[tickerType]

    try:
        if not args.export:
            if not downloader.isDone():
                print("Downloading " + downloader.type)
                print("")
                downloadEverything(downloader, tickerType, args.insecure, args.sleep, args.pandantic, market)
                print ("Saving downloader to disk...")
                saveDownloader(downloader, tickerType)
                print ("Downloader successfully saved.")
                print ("")
            else:
                print("The downloader has already finished downloading everything")
                print("")

    except Exception as ex:
        print("A exception occurred while downloading. Suspending downloader to disk")
        saveDownloader(downloader, tickerType)
        print("Successfully saved download state")
        print("Try removing {type}.pickle file if this error persists")
        print("Issues can be reported on https://github.com/Benny-/Yahoo-ticker-symbol-downloader/issues")
        print("")
        raise
    except KeyboardInterrupt as ex:
        print("Suspending downloader to disk as .pickle file")
        saveDownloader(downloader, tickerType)

    if downloader.isDone() or args.export:
        print("Exporting "+downloader.type+" symbols")

        data = tablib.Dataset()
        data.headers = downloader.getRowHeader()

        for symbol in downloader.getCollectedSymbols():
            if(args.Exchange == None):
                data.append(symbol.getRow())
            elif (symbol.exchange == args.Exchange):
                data.append(symbol.getRow())

        with io.open(downloader.type + '.csv', 'w', encoding='utf-8') as f:
            f.write(text.join(u',', data.headers) + '\n')
            writer = csv.writer(f)
            for i in range(0, len(data)):
                row = [text(y) if not y is None else u"" for y in data[i]]
                writer.writerow(row)

        with open(downloader.type + '.xlsx', 'wb') as f:
            f.write(data.xlsx)

        with open(downloader.type + '.json', 'wb') as f:
            f.write(data.json.encode('UTF-8'))

        with open(downloader.type + '.yaml', 'wb') as f:
            f.write(data.yaml.encode('UTF-8'))

Beispiel #14

0

Datei anzeigen

Datei: YahooTickerDownloader.py Projekt: natethern/Yahoo-ticker-symbol-downloader

def print_symbol(symbol):
    try:
        print(" " + text(symbol))
    except:
        print(" Could not display some ticker symbols due to char encoding")

Beispiel #15

0

Datei anzeigen

Datei: YahooTickerDownloader.py Projekt: Benny-/Yahoo-ticker-symbol-downloader

def main():
    downloader = None

    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--insecure", help="use HTTP instead of HTTPS", action="store_true")
    parser.add_argument("-e", "--export", help="export immediately without downloading (Only useful if you already downloaded something to the .pickle file)", action="store_true")
    parser.add_argument('-E', '--Exchange', help='Only export ticker symbols from this exchange (the filtering is done during the export phase)')
    parser.add_argument('type', nargs='?', default='generic', help='The type to download, this can be: '+" ".join(list(options.keys())))
    parser.add_argument("-s", "--sleep", help="The time to sleep in seconds between requests", type=float, default=0)
    parser.add_argument("-p", "--pandantic", help="Stop and warn the user if some rare assertion fails", action="store_true")

    args = parser.parse_args()

    protocol = 'http' if args.insecure else 'https'
    if args.insecure:
        print("Using insecure connection")

    if args.export:
        print("Exporting pickle file")

    tickerType = args.type = args.type.lower()

    print("Checking if we can resume a old download session")
    try:
        downloader = loadDownloader(tickerType)
        print("Downloader found on disk, resuming")
    except:
        print("No old downloader found on disk")
        print("Starting a new session")
        if tickerType not in options:
            print("Error: " + tickerType + " is not a valid type option. See --help")
            exit(1)
        else:
            downloader = options[tickerType]

    rp = robotparser.RobotFileParser()
    rp.set_url(protocol + '://finance.yahoo.com/robots.txt')
    rp.read()
    try:
        if not args.export:
            
            if(not rp.can_fetch(user_agent, protocol + '://finance.yahoo.com/_finance_doubledown/api/resource/searchassist')):
                print('Execution of script halted due to robots.txt')
                return 1
            
            if not downloader.isDone():
                print("Downloading " + downloader.type)
                print("")
                downloadEverything(downloader, tickerType, args.insecure, args.sleep, args.pandantic)
                print ("Saving downloader to disk...")
                saveDownloader(downloader, tickerType)
                print ("Downloader successfully saved.")
                print ("")
            else:
                print("The downloader has already finished downloading everything")
                print("")

    except Exception as ex:
        print("A exception occurred while downloading. Suspending downloader to disk")
        saveDownloader(downloader, tickerType)
        print("Successfully saved download state")
        print("Try removing {type}.pickle file if this error persists")
        print("Issues can be reported on https://github.com/Benny-/Yahoo-ticker-symbol-downloader/issues")
        print("")
        raise
    except KeyboardInterrupt as ex:
        print("Suspending downloader to disk as .pickle file")
        saveDownloader(downloader, tickerType)

    if downloader.isDone() or args.export:
        print("Exporting "+downloader.type+" symbols")

        data = tablib.Dataset()
        data.headers = downloader.getRowHeader()

        for symbol in downloader.getCollectedSymbols():
            if(args.Exchange == None):
                data.append(symbol.getRow())
            elif (symbol.exchange == args.Exchange):
                data.append(symbol.getRow())

        with io.open(downloader.type + '.csv', 'w', encoding='utf-8') as f:
            f.write(text.join(u',', data.headers) + '\n')
            writer = csv.writer(f)
            for i in range(0, len(data)):
                row = [text(y) if not y is None else u"" for y in data[i]]
                writer.writerow(row)

        try:
            with open(downloader.type + '.xlsx', 'wb') as f:
                f.write(data.xlsx)
        except:
            print("Could not export .xlsx due to a internal error")

        try:
            with open(downloader.type + '.json', 'wb') as f:
                f.write(data.json.encode('UTF-8'))
        except:
            print("Could not export .json due to a internal error")

        try:
            with open(downloader.type + '.yaml', 'wb') as f:
                f.write(data.yaml.encode('UTF-8'))
        except:
            print("Could not export .yaml due to a internal error")