Example #1
0
def download_monthly_klines(symbols, num_symbols, intervals, years, months,
                            checksum):
    current = 0
    print("Found {} symbols".format(num_symbols))

    for symbol in symbols:
        print("[{}/{}] - start download monthly {} klines ".format(
            current + 1, num_symbols, symbol))
        for interval in args.intervals:
            for year in args.years:
                for month in args.months:
                    path = "data/spot/monthly/klines/{}/{}/".format(
                        symbol.upper(), interval)
                    file_name = "{}-{}-{}-{}.zip".format(
                        symbol.upper(), interval, year, '{:02d}'.format(month))
                    download_file(path, file_name)

                    if checksum == 1:
                        checksum_path = "data/spot/monthly/klines/{}/{}/".format(
                            symbol.upper(), interval)
                        checksum_file_name = "{}-{}-{}-{}.zip.CHECKSUM".format(
                            symbol.upper(), interval, year,
                            '{:02d}'.format(month))
                        download_file(checksum_path, checksum_file_name)

        current += 1
def download_daily_aggTrades(trading_type, symbols, num_symbols, dates, start_date, end_date, folder, checksum):
  current = 0
  date_range = None

  if start_date and end_date:
    date_range = start_date + " " + end_date

  if not start_date:
    start_date = START_DATE
  else:
    start_date = convert_to_date_object(start_date)

  if not end_date:
    end_date = END_DATE
  else:
    end_date = convert_to_date_object(end_date)

  print("Found {} symbols".format(num_symbols))

  for symbol in symbols:
    print("[{}/{}] - start download daily {} aggTrades ".format(current+1, num_symbols, symbol))
    for date in dates:
      current_date = convert_to_date_object(date)
      if current_date >= start_date and current_date <= end_date:
        path = get_path(trading_type, "aggTrades", "daily", symbol)
        file_name = "{}-aggTrades-{}.zip".format(symbol.upper(), date)
        download_file(path, file_name, date_range, folder)

        if checksum == 1:
          checksum_path = get_path(trading_type, "aggTrades", "daily", symbol)
          checksum_file_name = "{}-aggTrades-{}.zip.CHECKSUM".format(symbol.upper(), date)
          download_file(checksum_path, checksum_file_name, date_range, folder)

    current += 1
Example #3
0
def analysis_deep_n(deep, gene, gene_hsa, pathway_this_gene, path, occu):
    download_file('http://rest.kegg.jp/get/' + pathway_this_gene + '/kgml',
                  os.path.join(os.getcwd(), 'database', 'pathways', 'xml'),
                  pathway_this_gene + '.xml.gz')

    list_rows = read_kgml(deep, pathway_this_gene, gene, gene_hsa, path, occu)

    return list_rows
Example #4
0
    def install_dependency(self, dependencyName, version, url, installDirectoryRelPath):
        savePath = utility.download_file(url, self.download_directory)
        utility.clear_directory_contents(self.extraction_directory)

        utility.extract_file(savePath, self.extraction_directory)
        os.remove(savePath)

        if self.installedDependencies.is_installed(dependencyName):
            self.remove_dependency(dependencyName)

        # not sure wether to add this or not (can cause serious impact)
        #if os.path.exists(installDirectory):
        #    utility.log("installation directory {i} for dependency {d} already exist, overwriting it...".format(i=installDirectory,d=dependencyName))
        #    shutil.rmtree(installDirectory)

        installDirectory = utility.joinPaths(self.dependencies_directory, installDirectoryRelPath)        
        utility.ensure_directory(installDirectory)

        # if the archive top level contains only one directory,copy its contents(not the directory itself)
        tempDirContents = [name for name in os.listdir(self.extraction_directory)]
        if len(tempDirContents) == 1 and os.path.isdir(utility.joinPaths(self.extraction_directory, tempDirContents[0])):
            dirPath = utility.joinPaths(self.extraction_directory, tempDirContents[0])
            utility.move_directory_contents(dirPath, installDirectory)
            os.rmdir(dirPath)
        else:
            utility.move_directory_contents(self.extraction_directory, installDirectory)

        self.installedDependencies.add_dependency(dependencyName, version, installDirectoryRelPath)
        return True
Example #5
0
def download_monthly_klines(symbols, num_symbols, intervals, years, months,
                            start_date, end_date, folder, checksum):
    current = 0
    date_range = None

    if start_date and end_date:
        date_range = start_date + " " + end_date

    if not start_date:
        start_date = START_DATE
    else:
        start_date = convert_to_date_object(start_date)

    if not end_date:
        end_date = END_DATE
    else:
        end_date = convert_to_date_object(end_date)

    print("Found {} symbols".format(num_symbols))

    for symbol in symbols:
        print("[{}/{}] - start download monthly {} klines ".format(
            current + 1, num_symbols, symbol))
        for interval in intervals:
            for year in years:
                for month in months:
                    current_date = convert_to_date_object('{}-{}-01'.format(
                        year, month))
                    if current_date >= start_date and current_date <= end_date:
                        path = "data/spot/monthly/klines/{}/{}/".format(
                            symbol.upper(), interval)
                        file_name = "{}-{}-{}-{}.zip".format(
                            symbol.upper(), interval, year,
                            '{:02d}'.format(month))
                        download_file(path, file_name, date_range, folder)

                        if checksum == 1:
                            checksum_path = "data/spot/monthly/klines/{}/{}/".format(
                                symbol.upper(), interval)
                            checksum_file_name = "{}-{}-{}-{}.zip.CHECKSUM".format(
                                symbol.upper(), interval, year,
                                '{:02d}'.format(month))
                            download_file(checksum_path, checksum_file_name,
                                          date_range, folder)

        current += 1
Example #6
0
def download_daily_klines(symbols, num_symbols, intervals, dates, start_date,
                          end_date, folder, checksum):
    current = 0
    date_range = None

    if start_date and end_date:
        date_range = start_date + " " + end_date

    if not start_date:
        start_date = START_DATE
    else:
        start_date = convert_to_date_object(start_date)

    if not end_date:
        end_date = END_DATE
    else:
        end_date = convert_to_date_object(end_date)

    #Get valid intervals for daily
    intervals = list(set(intervals) & set(DAILY_INTERVALS))
    print("Found {} symbols".format(num_symbols))

    for symbol in symbols:
        print("[{}/{}] - start download daily {} klines ".format(
            current + 1, num_symbols, symbol))
        for interval in intervals:
            for date in dates:
                current_date = convert_to_date_object(date)
                if current_date >= start_date and current_date <= end_date:
                    path = "data/spot/daily/klines/{}/{}/".format(
                        symbol.upper(), interval)
                    file_name = "{}-{}-{}.zip".format(symbol.upper(), interval,
                                                      date)
                    download_file(path, file_name, date_range, folder)

                    if checksum == 1:
                        checksum_path = "data/spot/daily/klines/{}/{}/".format(
                            symbol.upper(), interval)
                        checksum_file_name = "{}-{}-{}.zip.CHECKSUM".format(
                            symbol.upper(), interval, date)
                        download_file(checksum_path, checksum_file_name,
                                      date_range, folder)

        current += 1
Example #7
0
def download_daily_aggTrades(symbols, num_symbols, dates, checksum):
    current = 0
    print("Found {} symbols".format(num_symbols))

    for symbol in symbols:
        print("[{}/{}] - start download daily {} aggTrades ".format(
            current + 1, num_symbols, symbol))
        for date in dates:
            path = "data/spot/daily/aggTrades/{}/".format(symbol.upper())
            file_name = "{}-aggTrades-{}.zip".format(symbol.upper(), date)
            download_file(path, file_name)

            if checksum == 1:
                checksum_path = "data/spot/daily/aggTrades/{}/".format(
                    symbol.upper())
                checksum_file_name = "{}-aggTrades-{}.zip.CHECKSUM".format(
                    symbol.upper(), date)
                download_file(checksum_path, checksum_file_name)

        current += 1
Example #8
0
def cmd_download(args):
    """ downloading one or more packages without monitoring them"""
    downloadDirectory = utility.joinPaths(os.getcwd(), args.directory)
    packages = [('@' in p and p.split('@')) or [p,"latest"] for p in args.packages]
    utility.ensure_directory(downloadDirectory)

    registryClient = get_registry_client()
    if not registryClient:
        raise Exception("registry server is not set, please set it using set-registry-server command")
    
    repositoryClient = get_repository_client()

    for name, version in packages:
        try:
            package_handler = registryClient.get_package_details(name)
        except Exception as e:
            utility.log(str(e))
            continue

        if version == 'latest':
            version = get_latest_version(package_handler.get_package_versions())
            if version == '0.0':
                utility.log("Package {p} is not in the ppm registry".format(p=name))
                continue
        else:
            version = str(StrictVersion(version))
            if not package_handler.check_version_existence(version):
                utility.log("Package {p} is not in the ppm registry".format(p=name))
                continue

        url = package_handler.get_package_url(version)
        # check for repository url
        if repositoryClient:
            repository_url = repositoryClient.get_package_repository_url(url)
            if repository_url:
                url = repository_url
        utility.download_file(url, downloadDirectory)
Example #9
0
def download_daily_klines(symbols, num_symbols, intervals, dates, checksum):
    current = 0
    #Get valid intervals for daily
    intervals = list(set(intervals) & set(DAILY_INTERVALS))
    print("Found {} symbols".format(num_symbols))
    for symbol in symbols:
        print("[{}/{}] - start download daily {} klines ".format(
            current + 1, num_symbols, symbol))
        for interval in intervals:
            for date in dates:
                path = "data/spot/daily/klines/{}/{}/".format(
                    symbol.upper(), interval)
                file_name = "{}-{}-{}.zip".format(symbol.upper(), interval,
                                                  date)
                download_file(path, file_name)

                if checksum == 1:
                    checksum_path = "data/spot/daily/klines/{}/{}/".format(
                        symbol.upper(), interval)
                    checksum_file_name = "{}-{}-{}.zip.CHECKSUM".format(
                        symbol.upper(), interval, date)
                    download_file(checksum_path, checksum_file_name)

        current += 1
Example #10
0
def run_analysis(starting_depth):
    for deep in range(starting_depth, gl.deep_input + 1):
        if deep == 1:
            # download initial pathway
            download_file(
                'http://rest.kegg.jp/get/' + gl.pathway_input + '/kgml',
                os.path.join(os.getcwd(), 'database', 'pathways', 'xml'),
                gl.pathway_input + '.xml.gz')

            # get info first gene from gene name
            hsa_finded, url_finded = get_info_gene_initial(
                gl.pathway_input, gl.gene_input)

            # set globals variables
            gl.gene_input_hsa = hsa_finded
            gl.gene_input_url = url_finded

            # read initial pathway, create and add genes to csv
            list_rows_df_returned = read_kgml(deep, gl.pathway_input,
                                              gl.gene_input, hsa_finded,
                                              gl.gene_input, 1)

            # add n genes found to the dataframe
            unified([list_rows_df_returned])

            # retrive other list pathways in reference to initial pathway
            list_pathways_this_gene = download_read_html(url_finded)

            # The pathway set as input from the config file is removed
            if gl.pathway_input in list_pathways_this_gene:
                list_pathways_this_gene.remove(gl.pathway_input)

            if len(list_pathways_this_gene) > 0:
                # process single gene on each CPUs available
                list_rows_df_returned = Parallel(n_jobs=gl.num_cores_input)(
                    delayed(
                        analysis_deep_n)(deep, gl.gene_input, hsa_finded,
                                         pathway_this_gene, gl.gene_input, 1)
                    for pathway_this_gene in set_progress_bar(
                        '[Deep: %d]' % deep, str(len(list_pathways_this_gene)))
                    (list_pathways_this_gene))

                unified(list_rows_df_returned)
            else:
                print('[Deep: 1] Only directly connected genes were found')
        else:
            # Retrieve the genes found at depth-1, avoiding the input gene
            df_genes_resulted = (
                gl.DF_TREE[(gl.DF_TREE['deep'] == deep - 1)
                           & (gl.DF_TREE['name_son'] != gl.gene_input)])

            for index, row in set_progress_bar(
                    '[Deep: %d]' % deep, str(df_genes_resulted.shape[0]))(
                        df_genes_resulted.iterrows()):

                # Return a list of pathways about the gene passed in input
                list_pathways_this_gene = download_read_html(
                    row['url_kegg_son'])

                # The pathway set as input from the config file is removed, so as to avoid an endless loop
                # if gl.pathway_input in list_pathways_this_gene:
                #    list_pathways_this_gene.remove(gl.pathway_input)

                # process single gene on each CPUs available
                list_rows_df_returned = Parallel(n_jobs=gl.num_cores_input)(
                    delayed(analysis_deep_n)
                    (deep, row['name_son'], row['hsa_son'], pathway_this_gene,
                     row['fullpath'], row['occurrences'])
                    for pathway_this_gene in list_pathways_this_gene)

                unified(list_rows_df_returned)

        # ----- START DROP DUPLICATES -----

        # Duplicates of the same level are extracted and sorted in alphabetical order
        df_genes_this_level = (gl.DF_TREE[gl.DF_TREE['deep'] == deep])
        df_duplicated_filtered = df_genes_this_level[
            df_genes_this_level.duplicated(subset=['name_son'],
                                           keep=False)].sort_values('name_son')

        # The names of the genes that are duplicated are recovered
        list_name_genes_duplicated = df_duplicated_filtered.name_son.unique()

        # process single gene on each CPUs available
        list_rows_to_do_df_returned = Parallel(n_jobs=gl.num_cores_input)(
            delayed(get_info_row_duplicated)(df_duplicated_filtered,
                                             gene_duplicate)
            for gene_duplicate in list_name_genes_duplicated)

        # The number of occurrences of the found links is updated and the duplicates will be deleted
        clean_update_row_duplicates(list_rows_to_do_df_returned)

        gl.DF_TREE = gl.DF_TREE[(gl.DF_TREE['deep'] == deep)]

        # export in csv per deep
        export_data_for_deep(deep)

        # Row indexes are reset, because they are no longer sequential due to the elimination of duplicates
        gl.DF_TREE = gl.DF_TREE.reset_index(drop=True)
Example #11
0
 def _download(self, url_key):
     download_file(get_url(url_key), get_file_name(url_key))