コード例 #1
0
ファイル: zenodo_metadata.py プロジェクト: gruel/datalight
    def validate(self):
        """Method which is verifying that the metadata does have the correct type
        and if the dependencies are respected.

        The dependencies have to be check because the value of a
        metadata can implied the presence of another one. For example,
        if *upload_type* (which is a necessary metadata) has the value
        *publication* that implied the presence of the metadata
        *publication_type*.
        """

        # Check if the minimal set of information are provided
        self._check_minimal()

        # Check validity of the license (if open or embargoed)
        self._check_license_availability()

        try:
            jsonschema.validate(self._metadata, self._schema)
        except jsonschema.exceptions.ValidationError as err:
            error = 'ValidationError: {}'.format(err.message)
            logger.error(error)
            raise ZenodoMetadataException(error)

        logger.info('Metadata should be ok to use for upload')
コード例 #2
0
ファイル: datalight.py プロジェクト: gruel/datalight
def get_files_path(fname):
    """Function to get the path(s) of the file(s)

    Parameters
    ----------
    fname: str
        Name of the file to get the path or the directory to list
    """

    # If fname is a file return a list with fname
    if os.path.isfile(fname):
        files_paths = [fname]
    else:
        # initializing empty file paths list
        file_paths = []

        # crawling through directory and subdirectories
        for root, directories, files in os.walk(fname):
            for filename in files:
                # join the two strings in order to form the full filepath.
                filepath = os.path.join(root, filename)
                file_paths.append(filepath)

    if not len(file_paths):
        message = 'File or directory: {} to upload does not exist.'.format(
            fname)
        logger.error(message)
        raise DatalightException(message)

    # returning all file paths
    return file_paths
コード例 #3
0
ファイル: zenodo_metadata.py プロジェクト: gruel/datalight
    def _check_minimal(self):
        """Method to check that the minimal set of Metadata needed for Zenodo
        is present
        """

        if self._metadata is None:
            message = 'Metadata not provided'
            logger.error(message)
            raise ZenodoMetadataException(message)

        minimal_keys = ('title', 'upload_type', 'description', 'creators')

        for key in minimal_keys:
            if key not in self._metadata.keys():
                error = 'Missing metadata information: {}'.format(key)
                logger.error(error)
                raise ZenodoMetadataException(error)

        if 'access_right' not in self._metadata:
            self._metadata['access_right'] = 'open'
            logger.warning('Add metadata: "access_right" set to default value '
                           '"open"')

        if 'license' not in self._metadata:
            self._metadata['license'] = 'cc-by-4.0'
            logger.warning('Add metadata: "license" set to default value '
                           '"cc-by-4.0"')

        # Default value.(Should be done in schema)
        #TODO

        return True
コード例 #4
0
    def _check_status_code(status_code):
        """Method to test that the request went as expected.

        Parameters
        ----------
        status_code: int
            status code return by the request (requests.status_code)

        Exception
        ---------
        ZenodoException:
            Raise exception if the request ended with a problem

        .. note:
            If the error is a Server conncetion problem,
            the exception is not raised (problem with the test in other hand)
        """
        # Test that everything went as expected
        if status_code < 400:
            logger.debug('Request succeed '
                         'with status code: {}'.format(status_code))
            return status_code

        if status_code >= 500:
            message = 'Server connection failed ' \
                      'with error: {}'.format(status_code)
            logger.error(message)
            raise ZenodoException(message)

        if status_code >= 400:
            message = 'Request failed ' \
                      'with error: {}'.format(status_code)
            logger.error(message)
            raise ZenodoException(message)
コード例 #5
0
ファイル: tb_spide.py プロジェクト: chenghao/taobao_spide
	def get_field(self, item):
		"""
		获取字段
		:param item:
		:return:
		"""
		try:
			div = item.xpath(u'div[@class="pic-box J_MouseEneterLeave J_PicBox"]//*')[0]
			url = div.xpath(u'div[@class="pic"]//a')[0].attrib.get("href")  # url地址
			url = self.is_startswith(url)
			url_md5 = hashlib.md5(url).hexdigest()
			bo = exist_by_urlmd5(url_md5)
			if bo is False:  # 数据库中不存在才新增
				img = div.xpath(u'div[@class="pic"]//img')[0].attrib
				cover = img.get("src") if img.get("src") else img.get("data-src")  # 封面
				cover = self.is_startswith(cover)
				similars = div.xpath(u'div[@class="similars"]//a')
				if similars:
					same_style_url = similars[0].attrib.get("href")  # 同款url
					if same_style_url is None:
						same_style_url = ""
					else:
						same_style_url = "https://s.taobao.com" + same_style_url

					if len(similars) > 1:
						similar_url = similars[1].attrib.get("href")
						if similar_url is None:
							similar_url = ""
						else:
							similar_url = "https://s.taobao.com" + similar_url  # 相似url
					else:
						similar_url = ""
				else:
					same_style_url = ""
					similar_url = ""

				div = item.xpath(u'div[@class="ctx-box J_MouseEneterLeave J_IconMoreNew"]/div')
				price = div[0].xpath(u'div[@class="price g_price g_price-highlight"]/strong')[0].text  # 商品价格
				sale_num = div[0].xpath(u'div[@class="deal-cnt"]')[0].text
				if sale_num is None:
					sale_num = 0
				else:
					sale_num = "".join([s for s in sale_num if s.isdigit()])  # 商品购买人数
				title_a = etree.tounicode(div[1].xpath(u'a')[0])  # 商品名称
				p = re.compile('<[^>]+>')  # 去掉html标签, 只留字符
				title = p.sub("", title_a).strip()
				shop_name = div[2].xpath(u'div/a/span')[1].text  # 商铺名称
				addr = div[2].xpath(u'div')[1].text  # 商铺地址
				tianmao = div[3].xpath(u'div/ul/li//span[@class="icon-service-tianmao"]')
				is_tmall = 1 if tianmao else 0  # 是否天猫商店

				data = {"url": url, "title": title, "cover": cover, "price": price, "sale_num": sale_num,
				        "shop_name": shop_name, "addr": addr, "is_tmall": is_tmall, "url_md5": url_md5,
				        "same_style_url": same_style_url, "similar_url": similar_url}
				self.pool.spawn(self.save, data)
		except Exception, e:
			logger.error("获取字段异常: " + str(e), exc_info=True)
コード例 #6
0
ファイル: tb_spide.py プロジェクト: chenghao/taobao_spide
	def save(self, data):
		"""
		保存数据库
		:param data:
		:return:
		"""
		try:
			save_tb(data)
		except Exception, e:
			logger.error("保存到数据库异常: " + str(e), exc_info=True)
コード例 #7
0
ファイル: tb_spide.py プロジェクト: chenghao/taobao_spide
	def req_url(self, url):
		"""
		请求url
		:param url:
		:return:
		"""
		try:
			self.browser.get(url)
			content = self.browser.page_source
			self.pool.spawn(self.parse_html, content.decode('utf-8', 'ignore'))
		except Exception, e:
			logger.error("请求url异常: " + str(e), exc_info=True)
コード例 #8
0
    def _verify_token(self):
        """ Function to test if token could be valid

        Exception
        ---------
        ZenodoException
            if token not define (token = None).

        """
        if self.token is None:
            message = 'No Zenodo token provided'
            logger.error(message)
            raise ZenodoException(message)
コード例 #9
0
ファイル: zenodo_metadata.py プロジェクト: gruel/datalight
    def set_schema(self, schema):

        if type(schema) is str:
            logger.info('Schema file use: {}'.format(schema))
            self._schema = self._read_schema(schema)
        elif type(schema) is dict:
            logger.info('Schema provided through dictionary object')
            if self._schema is None:
                self._schema = schema
            else:
                self._schema.update(schema)
        else:
            message = 'Something is wrong with the schema: {}.'.format(schema)
            logger.error(message)
            raise ZenodoMetadataException(message)
コード例 #10
0
ファイル: zenodo_metadata.py プロジェクト: gruel/datalight
    def _read_metadata(fmetadata):
        """Method to read Zenodo metadata file
        """
        logger.info('Read metadata from: {}'.format(fmetadata))
        try:
            with open(fmetadata) as f:
                _metadata = yaml.load(f)
        except FileNotFoundError as err:
            message = 'Metadata file not founded.'.format(fmetadata)
            logger.error(message)
            raise ZenodoMetadataException(message)

        # change communities identifier in lower case (only format accepted by zenodo)
        if 'communities' in _metadata:
            for _com in _metadata['communities']:
                _com['identifier'] = _com['identifier'].lower()

        return _metadata
コード例 #11
0
    def delete(self, _id=None):
        """Method to delete deposition.

        Parameters
        ----------
        _id: int
            deposition id of the record to delete

        .. note::
            it worked only if it is not publish.

        Exception
        ---------
        ZenodoException
            raise if token not define (token = None) or if connection
            return status >= 400
        """
        # Test if token was defined
        self._verify_token()

        # Use provided if if not None. If not provided use self.deposition_id

        if _id is not None:
            self.deposition_id = _id

        # Create the request url
        request_url = (self.depositions_url + '/{}'.format(self.deposition_id))

        logger.info('Delete url: {}'.format(request_url))
        try:
            request = requests.delete(request_url,
                                      params={'access_token': self.token})
            self.status_code = request.status_code
            logger.debug('Status code: {}'.format(self.status_code))
            if self.status_code >= 400:
                raise ZenodoException
        except ZenodoException:
            message = 'Request_url does not exist or bad token. ' \
                      'Error: {}'.format(self.status_code)
            logger.error(message)
            raise ZenodoException(message)
コード例 #12
0
ファイル: zenodo_metadata.py プロジェクト: gruel/datalight
    def _get_opendefinition_file():
        """Method which download the definition file for open source licenses
        accepted by Zenodo.

        Return
        ------
        licenses: dict
            a dictionnary which contains the informations the differents
            licenses.
        """
        url = 'https://licenses.opendefinition.org/licenses/groups/all.json'
        try:
            with urllib.request.urlopen(url) as f:
                licenses = json.load(f)
                logger.info(
                    'open licenses file use for validation: {}'.format(url))
        except urllib.error.URLError:
            message = 'Not possible to access to the list ' \
                      '(internet connection problem?): {}'.format(url)
            logger.error(message)
            raise ZenodoMetadataException(message)
        return licenses
コード例 #13
0
    def get_deposition_id(self):
        """Method to obtain the deposition id need to upload documents to Zenodo

        Attributes
        ----------
        deposition_id: int
            Deposition id gave by Zenodo deposition api to be used to upload
            files and metadata.

        Exception
        ---------
        ZenodoException
            raise if token not define (token = None) or if connection
            return status >= 400
        """
        headers = {'Content-Type': 'application/json'}

        # Test if Token defined and access zenodo to test the token if exist
        self._verify_token()

        request = requests.post(self.depositions_url,
                                params={'access_token': self.token},
                                json={},
                                headers=headers)
        self.status_code = request.status_code
        logger.debug('Status code: {}'.format(self.status_code))
        logger.debug('deposition url: {}'.format(self.depositions_url))

        # Test that the request succeed
        if self.status_code >= 400:
            message = ('Deposition id not obtain, '
                       'error: {}'.format(self.status_code))
            logger.error(message)
            raise ZenodoException(message)
        else:
            self.deposition_id = request.json()['id']
            logger.info('Deposition id: {}'.format(self.deposition_id))
            logger.info('Deposition url: {}'.format(self.deposition_id))
コード例 #14
0
ファイル: zenodo_metadata.py プロジェクト: gruel/datalight
    def _read_schema(fschema):
        """Method to read the schema.

        Parameter
        ---------
        schema: str
            Name of the file which contain the definition of the schema

        Return
        ------
        _schema: dict
            dictionary which contains the schema used to validate the metadata.
        """

        logger.info('Read schema from: {}'.format(fschema))
        try:
            with open(fschema) as f:
                _schema = yaml.load(f)
        except FileNotFoundError as err:
            message = 'Schema file not founded.'.format(fschema)
            logger.error(message)
            raise ZenodoMetadataException(message)
        return _schema
コード例 #15
0
    def connection(self):
        """Method to test that connection with Zenodo website is working.

        Exception
        ---------
        ZenodoException
            raise if token not define (token = None) or if connection
            return status >= 400
        """
        # Test if Token defined and access zenodo to test the token if exist
        self._verify_token()

        request = requests.get(self.depositions_url,
                               params={'access_token': self.token})
        self.status_code = request.status_code
        logger.debug('Status code: {}'.format(self.status_code))

        # Raise exception if Error from Zenodo (status >= 400)
        if self.status_code >= 400:
            message = 'Access token not accepted by Zenodo. ' \
                      'Error: {}'.format(self.status_code)
            logger.error(message)
            self.token = None
            raise ZenodoException(message)
コード例 #16
0
ファイル: main.py プロジェクト: zqtao2332/zqtao-learn-notes
async def get_phone(sem, id_):
    sql = f'select count(1) from t_info where id_ = {id_}'
    result = engine.execute(sql).fetchmany()[0][0]
    if result != 0:
        return True
    url = f'http://lxbjs.baidu.com/cb/url/show?f=55&id={id_}'
    async with ClientSession() as session:
        async with sem:
            try:
                async with session.get(url, headers=headers,
                                       timeout=10) as respone:
                    text = await respone.text()
                    soup = BeautifulSoup(text, 'html.parser')
                    info = soup.find('div', class_='cpy-info').text
                    tel = soup.find('div', class_='cpy-info cpy-tel').text
                    url = soup.find('div',
                                    class_='cpy-info cpy-url').find('a').text
                    sql = f"insert into t_info values({id_}, '{info}', '{tel}', '{url}')"
                    engine.execute(sql)
                    logger.info(f'爬取信息成功: [{id_}] {info}')
                return True
            except Exception as e:
                logger.error(f'爬取信息失败: [{id_}] {type(e)}: {str(e)}')
                return False
コード例 #17
0
ファイル: datalight.py プロジェクト: gruel/datalight
def main(args=None):
    """Run datalight scripts to upload file on data repository

    Command line::

        Usage: datalight [-h | --help] <files>... (-m <metadata> | --metadata=<metadata>) [options]

        Options:

        -m FILE --metadata=FILE        File which contains the metadata information
        -z zipname --zipname=FILE      Name of the zip file which will be uploaded [default: data.zip]
        --nozip                        Do not create zip file containing the data to upload
        -r NAME --repository=NAME      Name of a data repository [default: zenodo]
        -p --publish                    If present publish the data
        -s --sandbox                   If present, datalight will use the sandbox data repository
        -k --keep                      Keep zip file created
        -h --help                      Print this help
        -v --version                   Print version of the software

        Examples:
            datalight file1 file2
            datalight directory --metadata=metadata.yml --repository=zenodo
            datalight file -m metadata.yml

    Raises
    ------
    SystemExit
        if the file or the folder to treat is not available.
    KeyError
        if no key found for the data repository wanted
    ImportError
        if the not possible to import the data repository wanted
    """

    # Read the arguments and option with docopt
    arguments = docopt(main.__doc__, argv=args, version=__version__)

    # Convert docopt results in the proper variable (change type when needed)

    # Lists all the files and/or directories to upload
    fnames = arguments['<files>']

    # Get list of the files path to upload
    files = []
    try:
        for fname in fnames:
            files += get_files_path(fname)
    except DatalightException:
        logger.error('Problem with the files to upload.')
        sys.exit()

    # option which will give the name of the metadata file
    metadata = arguments['--metadata']

    if not os.path.exists(metadata):
        logger.error('Metadata file: {} does not exist.'.format(metadata))
        sys.exit(1)

    # Choice of repository default Zenodo
    repository = arguments['--repository']

    if repository is None:
        repository = 'zenodo'

    # If sandbox is present the version of the repository
    # used will be the sandbox one
    sandbox = arguments['--sandbox']

    # Zip data in an archive (to keep paths)
    if not arguments['--nozip']:
        zipname = arguments['--zipname']
        zipdata(files, zipname)

        # Change the name of the files to upload for the zip file created
        files, directory = [zipname], '.'

    if repository == 'zenodo':
        try:
            from .zenodo import Zenodo as DataRepo
            from .zenodo import ZenodoException as DataRepoException
        except ImportError:
            from zenodo import Zenodo as DataRepo
            from zenodo import ZenodoException as DataRepoException

        # Read zenodo token file from home repository
        tokenfile = os.path.join(home, '.zenodo')
        zenoconfig = configparser.ConfigParser()
        zenoconfig.read(tokenfile)

        try:
            if sandbox:
                token = zenoconfig['sandbox.zenodo.org']['lightform']
            else:
                token = zenoconfig['zenodo.org']['lightform']
        except KeyError:
            token = input('Provide Zenodo token: ')

            # Save the token to the ~/.zenodo
            config = configparser.ConfigParser()
            if sandbox:
                config['sandbox.zenodo.org'] = {'lightform': token}
            else:
                config['zenodo.org'] = {'lightform': token}

            with open(tokenfile, 'a', encoding="utf-8") as configfile:
                config.write(configfile)

    datarepo = DataRepo(token=token, sandbox=sandbox)
    datarepo.get_deposition_id()
    datarepo.upload_files(files, path=directory)
    datarepo.set_metadata(metadata)
    datarepo.upload_metadata()
    if arguments['--publish']:
        datarepo.publish()

    # Remove zip file create but if asked to keep it
    if not arguments['--nozip'] \
            and not arguments['--keep'] \
            and len(files) == 1:
        logger.info('Remove created zip file: {}'.format(files[0]))
        os.remove(files[0])
    logger.info("Finished " + logger.name)
コード例 #18
0
ファイル: zenodo_metadata.py プロジェクト: gruel/datalight
    def _check_license_availability(self,
                                    flicenses=None,
                                    opendefinition=False):
        """Method to verify the license

        Zenodo metadata des have an non-optional keyword *access_right*,
        that if it is set to open or embargoed an optional keyword
        **can** be added: license.
        The license in this case has to be considered as open by Zenodo and
        be part of the list provided by the
        `Open Definition License Service<https://licenses.opendefinition.org/>`_

        The method will look directly on internet where the service is providing
        a json file which contains all the acceptable license:

        https://licenses.opendefinition.org/licenses/groups/all.json

        This file is also provided by the software to be able to verify
        the validity of the license.

        .. important::
            The file provided by the software **could** be out-dated.
            Since the upload of the data on Zenodo will do the verification
            it is not a major problem but the user as to be careful.

        Parameter
        ---------

        update: boolean
            if True will update the license file
            TODO: NOT IMPLEMENTED YET

        Exception
        ---------
        raise exception if license does not exist in the list accepted by Zenodo
        as open.


        TODO: modify method to use file on disk before and if license not there,
        TODO: look at the file on internet and retest it.
        """

        # if access right is not 'open' or 'embargoed' there are no need to
        # test if the license is open compliant with Zenodo

        if not (self._metadata['access_right'] in ['open', 'embargoed']):
            logger.info('No need to check license for Zenodo upload.')
            return True

        # Get on the opendefinition website the file with the licenses
        # informations
        if opendefinition:
            licenses = self._get_opendefinition_file()

        # Get the licenses information from an input file or
        # from the default file
        else:

            if flicenses is None:
                flicenses = os.path.join(_dir, 'schemas', 'zenodo',
                                         'opendefinition-licenses.json')

            try:
                with open(flicenses) as f:
                    licenses = json.load(f)
                    logger.info(
                        'Use file: {} to validate license'.format(flicenses))
            except FileNotFoundError:
                licenses = self._get_opendefinition_file()

        if ('license' in self._metadata
                and self._metadata['access_right'] in ['open', 'embargoed']):
            self._metadata['license'] = self._metadata['license'].upper()
            mlicense = self._metadata['license'].upper()
            logger.info('License present in metadata file: '
                        '"{}"'.format(mlicense))
            logger.info('access_right: '
                        '"{}"'.format(self._metadata['access_right']))

            _tmp = ''
            for lic in licenses.keys():
                if lic.startswith(mlicense):
                    logger.info('license: "{}" validated.'.format(lic))
                    return True

            message = 'license: "{}" is not listed as ' \
                      'open by Zenodo'.format(self._metadata['license'])
            logger.error(message)
            raise ZenodoMetadataException(message)
コード例 #19
0
def run_monitoring_tool():
    email_sender = AlertEmailSender(c.MAIL.GMAIL_USERNAME,
                                    c.MAIL.GMAIL_PASSWORD,
                                    c.MAIL.NOTIFICATION_EMAIL,
                                    c.MAIL.EMAIL_SUBJECT)

    # part online - offline
    addr = c.BITCOIN_WALLET_PUBLIC_ID
    nice_hash_client = NiceHashClient(addr)
    polling_interval_sec = 60  # 1 minute
    rig_names_to_monitor = c.RIG_HOSTNAMES
    previous_rig_statuses = [True] * len(rig_names_to_monitor)  # initial statuses
    rig_statuses = list(previous_rig_statuses)

    # part balance
    interval_between_balance_reporting_sec = 60 * 60 * 4  # in seconds
    last_balance_reporting_time = 0

    while True:
        logger.debug('run_monitoring_tool() - RUNNING')
        try:
            # PART ONLINE - OFFLINE INSPECTION
            rig_names, speeds, up_time_minutes, locations, algo_ids = nice_hash_client.get_mining_rigs()
            connected_rig_names = set(rig_names)

            for i, rig_name_to_monitor in enumerate(rig_names_to_monitor):
                if rig_name_to_monitor not in connected_rig_names:
                    logger.debug('{} is down.'.format(rig_name_to_monitor))
                    rig_statuses[i] = False
                    if previous_rig_statuses[i] is True:
                        email_sender.send_email(email_content='[{}] host is down. Please check.'.format(
                            rig_name_to_monitor))
                else:
                    logger.debug('{} is connected.'.format(rig_name_to_monitor))
                    rig_statuses[i] = True
                    if previous_rig_statuses[i] is False:
                        email_sender.send_email(email_content='[{}] host successfully connected.'.format(
                            rig_name_to_monitor))

            previous_rig_statuses = list(rig_statuses)

            # PART BALANCE
            if (time() - last_balance_reporting_time) > interval_between_balance_reporting_sec:
                ref_fiat_currencies = c.REFERENCE_FIAT_CURRENCY
                if ',' in ref_fiat_currencies:
                    ref_fiat_currencies = ref_fiat_currencies.split(',')
                else:
                    ref_fiat_currencies = [ref_fiat_currencies]

                unpaid_balance_btc = nice_hash_client.get_unpaid_balance_btc()
                unpaid_balance_fiat_list = list()
                for ref_fiat_currency in ref_fiat_currencies:
                    price_for_one_btc_in_fiat_currency = get_btc_usd_rate(ref_fiat_currency)
                    if price_for_one_btc_in_fiat_currency is None:
                        price_for_one_btc_in_fiat_currency = -1.0
                    unpaid_balance_fiat = unpaid_balance_btc * price_for_one_btc_in_fiat_currency
                    unpaid_balance_fiat_list.append(unpaid_balance_fiat)

                d = ', '.join(
                    ['{0:.2f} {1}'.format(u, v) for (u, v) in zip(unpaid_balance_fiat_list, ref_fiat_currencies)])
                d = 'Your unpaid balance is now {0:.8f} BTC ({1} approx).'.format(unpaid_balance_btc, d)
                email_sender.send_email(email_content=d)
                last_balance_reporting_time = time()

        except Exception as e:
            logger.error(e)
        logger.debug('Going to sleep for {} seconds.'.format(polling_interval_sec))
        sleep(polling_interval_sec)