Esempio n. 1
0
def add_key (alg, key, nm):
    try:
        name, machine = nm.split('@')
    except ValueError:
        return False

    name = name

    d = './' + keydir + '/' + machine
    kf = d + '/' + prepend + name + '.pub'

    if  not mre.match (machine) or not nre.match (name) or not are.match (alg):
        return False

    if os.path.exists(kf):
        print 'ignoring duplicate key for:', kf
        return True # we do this so that we don't leak info

    f = NamedTemporaryFile(delete=False)
    f.file.write ('%s %s %s@%s\n' % (alg, key, name, machine))
    f.close()

    p = Popen(['ssh-vulnkey', f.name], stdin=PIPE, stdout=PIPE, stderr=PIPE)
    p.stdin.close()
    if p.stderr.read().__len__() > 1:
        f.unlink (f.name)
        return False

    if not os.path.exists (d):
        os.makedirs (d)

    shutil.move (f.name, kf)
    print "Imported", kf
    return True
Esempio n. 2
0
    def test_dump(self):

        cbd1 = CitiBikeData(source_url=self.test_data_url)
        self.assert_data_loaded(cbd1)

        js = StringIO()
        cbd1.dump(js)
        self.assert_data_loaded(cbd1)
        self.assertGreater(len(js.getvalue()), 0)
        js.reset()

        cbd2 = CitiBikeData(load_on_init=False)
        self.assert_data_not_loaded(cbd2)
        cbd2.load(js)
        self.assert_data_loaded(cbd2)
        self.assertDictEqual(cbd1.json, cbd2.json)

        ntf = NamedTemporaryFile(delete=False)
        cbd1.dump(ntf)
        self.assert_data_loaded(cbd1)
        self.assertGreater(len(js.getvalue()), 0)
        ntf.close()

        cbd3 = CitiBikeData(source_url="file:"+ntf.name)
        self.assert_data_loaded(cbd3)
        self.assertDictEqual(cbd1.json, cbd3.json)
        ntf.unlink(ntf.name)  # delete file
Esempio n. 3
0
def atomic_write(content, target):
    t = NamedTemporaryFile(dir="/tmp", delete=False)
    t.file.write(content)
    t.file.flush()
    t.close()
    copy(t.name, target)
    t.unlink(t.name)
Esempio n. 4
0
 def export(self, out_f=None, format='mp3'):
     out_f = _fd_or_path_or_tempfile(out_f, 'wb+')
     out_f.seek(0)
     data = NamedTemporaryFile(mode="wb", delete=False)
     
     wave_data = wave.open(data)
     wave_data.setnchannels(self.channels)
     wave_data.setsampwidth(self.sample_width)
     wave_data.setframerate(self.frame_rate)
     wave_data.setnframes(self.frame_count())
     wave_data.writeframesraw(self._data)
     wave_data.close()
     
     
     output = NamedTemporaryFile(mode="w+")
     
     # read stdin / write stdout
     subprocess.call(['ffmpeg', 
                      '-y', # always overwrite existing files
                      "-f", "wav", "-i", data.name, # input options (filename last)
                      "-f", format, output.name, # output options (filename last)
                      ], 
                     
                     # make ffmpeg shut up
                     stderr=open(os.devnull))
     
     output.seek(0)
     out_f.write(output.read())
     
     data.unlink(data.name)
     out_f.seek(0)
     return out_f
Esempio n. 5
0
def _save_samba_share(conf):
    temp = NamedTemporaryFile('w', delete=False)
    
    conf.write(temp)
    temp.close()

    bkp_date = datetime.datetime.now().strftime('%Y-%m-%d_%Hh%Mm%Ss')
                          
                          
    process = run_as_root('cp "/etc/samba/smb.conf" '\
                          '"/etc/samba/smb.conf-{0}.bkp"'
                          .format(bkp_date))
    process.expect(pexpect.EOF)

    process = run_as_root('cp "{0}" "/etc/samba/smb.conf"'
                          .format(temp.name))
    process.expect(pexpect.EOF)
    
    process = run_as_root('chmod 644 /etc/samba/smb.conf')
    process.expect(pexpect.EOF)
    
    process = run_as_root('chown root:root /etc/samba/smb.conf')
    process.expect(pexpect.EOF)

    temp.unlink(temp.name)
Esempio n. 6
0
    def launcher_hadoop_job(self,
                            data_type,
                            input,
                            output,
                            result_companyId,
                            map_tasks=8,
                            red_tasks=8):
        """Runs the Hadoop job uploading task configuration"""
        # create report to save on completion or error
        report = {
            'started_at': datetime.now(),
            'state': 'launched',
            'input': input
        }

        # Create temporary file to upload with json extension to identify it in HDFS
        job_extra_config = self.config.copy()
        job_extra_config.update({'companyId': result_companyId})
        f = NamedTemporaryFile(delete=False, suffix='.json')
        f.write(json.dumps(job_extra_config))
        f.close()
        self.logger.debug(
            'Created temporary config file to upload into hadoop and read from job: %s'
            % f.name)
        # create hadoop job instance adding file location to be uploaded
        if data_type == "billing":
            mr_job = MRJob_clean_billing_data(args=[
                '-r', 'hadoop', 'hdfs://' + input, '--file', f.name, '-c',
                'module_edinet/edinet_clean_daily_data_etl/mrjob.conf',
                '--output-dir', 'hdfs://' + output, '--jobconf',
                'mapred.job.name=edinet_clean_daily_data_etl_billing',
                '--jobconf', 'mapred.reduce.tasks={}'.format(self.num_reducers)
            ])
        elif data_type == "metering":
            mr_job = MRJob_clean_metering_data(args=[
                '-r', 'hadoop', 'hdfs://' + input, '--file', f.name, '-c',
                'module_edinet/edinet_clean_daily_data_etl/mrjob.conf',
                '--output-dir', 'hdfs://' + output, '--jobconf',
                'mapred.job.name=edinet_clean_daily_data_etl_metering',
                '--jobconf', 'mapred.reduce.tasks={}'.format(self.num_reducers)
            ])
        else:
            raise Exception(
                "The job with data type {} can not be treated".format(
                    data_type))
        with mr_job.make_runner() as runner:
            try:
                runner.run()
            except Exception as e:
                f.unlink(f.name)
                raise Exception(
                    'Error running MRJob process using hadoop: {}'.format(e))

        f.unlink(f.name)
        self.logger.debug(
            'Temporary config file uploaded has been deleted from FileSystem')

        report['finished_at'] = datetime.now()
        report['state'] = 'finished'
        return report
Esempio n. 7
0
File: test.py Progetto: ILJICH/PyBd
 def test_pipehandler(self):
     tmp = NamedTemporaryFile(delete=False)
     tmp.close()
     Handler = HandlerFactory("pipe", {"path":tmp.name})
     h = Handler("test")
     h([])
     with open(tmp.name,"r") as f:
         self.assertEqual(f.read(), "test")
     tmp.unlink(tmp.name)
Esempio n. 8
0
    def launcher_hadoop_job(self,
                            type,
                            input,
                            company=None,
                            devices=None,
                            stations=None,
                            map_tasks=8,
                            red_tasks=8):
        """Runs the Hadoop job uploading task configuration"""
        # create report to save on completion or error
        report = {
            'started_at': datetime.now(),
            'state': 'launched',
            'input': input
        }

        # Create temporary file to upload with json extension to identify it in HDFS
        job_extra_config = self.config.copy()
        job_extra_config.update({
            'devices': devices,
            'company': company,
            'stations': stations,
            'task_id': self.task_UUID
        })
        f = NamedTemporaryFile(delete=False, suffix='.json')
        f.write(json.dumps(job_extra_config))
        f.close()
        report['config_temp_file'] = f.name
        self.logger.debug(
            'Created temporary config file to upload into hadoop and read from job: %s'
            % f.name)
        # create hadoop job instance adding file location to be uploaded
        mr_job = MRJob_align(args=[
            '-r', 'hadoop', 'hdfs://' + input, '--file', f.name, '-c',
            'module_edinet/edinet_baseline_hourly_module/mrjob.conf',
            '--jobconf', 'mapred.job.name=edinet_baseline_hourly_module',
            '--jobconf', 'mapred.reduce.tasks={}'.format(self.num_reducers)
        ])
        # mr_job = MRJob_align(args=['-r', 'hadoop', 'hdfs://'+input, '--file', f.name, '--output-dir', '/tmp/prova_dani', '--python-archive', path.dirname(lib.__file__)])  # debugger
        with mr_job.make_runner() as runner:
            try:
                runner.run()
            except Exception as e:
                f.unlink(f.name)
                raise Exception(
                    'Error running MRJob process using hadoop: {}'.format(e))

        f.unlink(f.name)
        self.logger.debug(
            'Temporary config file uploaded has been deleted from FileSystem')

        report['finished_at'] = datetime.now()
        report['state'] = 'finished'

        return report
Esempio n. 9
0
File: test.py Progetto: ILJICH/PyBd
    def test_clbhandler(self):
        tmp = NamedTemporaryFile(delete=False)
        tmp.file.write("def raise_(): raise FutureWarning\n")
        tmp.file.write("def raise__(smt): raise smt")
        tmp.close()

        Handler = HandlerFactory("callback", {"path": tmp.name})
        self.assertRaises(FutureWarning, Handler("raise_()"), [])
        self.assertRaises(FutureWarning, Handler("raise__({0})"), [FutureWarning])

        tmp.unlink(tmp.name)
Esempio n. 10
0
    def export(self, out_f=None, format='mp3', codec=None):
        out_f = _fd_or_path_or_tempfile(out_f, 'wb+')
        out_f.seek(0)
        # for wav output we can just write the data directly to out_f
        if format == "wav":
            data = out_f
        else:
            data = NamedTemporaryFile(mode="wb", delete=False)

        wave_data = wave.open(data, 'wb')
        wave_data.setnchannels(self.channels)
        wave_data.setsampwidth(self.sample_width)
        wave_data.setframerate(self.frame_rate)
        wave_data.setnframes(self.frame_count())
        wave_data.writeframesraw(self._data)
        wave_data.close()

        # for wav files, we're done (wav data is written directly to out_f)
        if format == 'wav':
            return out_f

        output = NamedTemporaryFile(mode="w+")

        # build call args
        args = [
            self.ffmpeg,
            '-y',  # always overwrite existing files
            "-f",
            "wav",
            "-i",
            data.name,  # input options (filename last)
        ]
        if codec is not None:
            # force audio encoder
            args.extend(["-acodec", codec])
        args.extend([
            "-f",
            format,
            output.name,  # output options (filename last)
        ])
        # read stdin / write stdout
        subprocess.call(
            args,
            # make ffmpeg shut up
            stderr=open(os.devnull))

        output.seek(0)
        out_f.write(output.read())

        data.unlink(data.name)
        out_f.seek(0)
        return out_f
Esempio n. 11
0
    def upload(self, src_name, dst_name, compress=True):
        if not compress:
            self._upload(src_name, dst_name)

        fout = NamedTemporaryFile(suffix='.gz', mode='wb', delete=False)
        try:
            fout.close()
            logging.debug('Compressing file %s...', src_name)
            with \
                    open(src_name, 'rb') as fin, \
                    closing(gzip.GzipFile(fout.name, mode='wb')) as gzout:
                for chunk in iterchunks(fin):
                    gzout.write(chunk)
            return self._upload(fout.name, dst_name + '.gz')
        finally:
            fout.unlink(fout.name)
Esempio n. 12
0
    def upload(self, src_name, dst_name, compress=True):
        if not compress:
            self._upload(src_name, dst_name)

        fout = NamedTemporaryFile(suffix='.gz', mode='wb', delete=False)
        try:
            fout.close()
            logging.debug('Compressing file %s...', src_name)
            with \
                    open(src_name, 'rb') as fin, \
                    closing(gzip.GzipFile(fout.name, mode='wb')) as gzout:
                for chunk in iterchunks(fin):
                    gzout.write(chunk)
            return self._upload(fout.name, dst_name + '.gz')
        finally:
            fout.unlink(fout.name)
Esempio n. 13
0
    def export(self, out_f=None, format='mp3', codec=None):
        out_f = _fd_or_path_or_tempfile(out_f, 'wb+')
        out_f.seek(0)
        # for wav output we can just write the data directly to out_f
        if format == "wav":
            data = out_f
        else:
            data = NamedTemporaryFile(mode="wb", delete=False)

        wave_data = wave.open(data, 'wb')
        wave_data.setnchannels(self.channels)
        wave_data.setsampwidth(self.sample_width)
        wave_data.setframerate(self.frame_rate)
        wave_data.setnframes(self.frame_count())
        wave_data.writeframesraw(self._data)
        wave_data.close()

        # for wav files, we're done (wav data is written directly to out_f)
        if format == 'wav':
            return out_f

        output = NamedTemporaryFile(mode="w+")

        # build call args
        args =[self.ffmpeg,
            '-y',  # always overwrite existing files
            "-f", "wav", "-i", data.name,  # input options (filename last)
        ]
        if codec is not None:
            # force audio encoder
            args.extend(["-acodec", codec])
        args.extend([
            "-f", format, output.name,  # output options (filename last)
        ])
        # read stdin / write stdout
        subprocess.call(args,
            # make ffmpeg shut up
            stderr=open(os.devnull)
        )

        output.seek(0)
        out_f.write(output.read())

        data.unlink(data.name)
        out_f.seek(0)
        return out_f
Esempio n. 14
0
    def hadoop_job(self, input, companyId):
        """Runs the Hadoop job uploading task configuration"""
        # create report to save on completion or error
        report = {
            'started_at': datetime.now(),
            'state': 'launched',
            'input': input
        }

        # Create temporary file to upload with json extension to identify it in HDFS
        f = NamedTemporaryFile(delete=False, suffix='.json')
        f.write(json.dumps(self.config))
        f.close()
        self.logger.debug(
            'Created temporary config file to upload into hadoop and read from job: {}'
            .format(f.name))

        # create hadoop job instance adding file location to be uploaded
        dtnow = datetime.now()
        str_dtnow = dtnow.strftime("%Y%m%d%H%M")

        mr_job = Hadoop_ETL(args=[
            '-r', 'hadoop', input, '--file', f.name, '--output-dir', "{}/{}/{}"
            .format(self.config['error_measures'], str(companyId), str_dtnow),
            '-c', 'module_edinet/edinet_billing_measures_etl/mrjob.conf',
            '--jobconf', 'mapred.job.name=edinet_billing_measures_etl'
        ])
        # mr_job = Hadoop_ETL(args=['-r', 'hadoop', input, '--file', f.name, '--python-archive', path.dirname(lib.__file__)])
        with mr_job.make_runner() as runner:
            try:
                runner.run()
            except Exception as e:
                f.unlink(f.name)
                raise Exception(
                    'Error running MRJob ETL process using hadoop: {}'.format(
                        e))

        f.unlink(f.name)
        self.logger.debug(
            'Temporary config file uploaded has been deleted from FileSystem')

        report['finished_at'] = datetime.now()
        report['state'] = 'finished'

        return report
Esempio n. 15
0
def validate_sparse_support(directory):
    SIZE = 1024 * 1024  # 1 MiB temp file seems reasonable

    if not directory.exists():
        raise FileNotFoundError(f"No such file or directory: '{directory}'")
    elif not directory.is_dir():
        raise ValueError(f"Expected '{directory}' to be a directory")

    # Try to write a small sparse file in the same directory as `filepath` and then read
    # the actual size to check that it's sparse
    temp_file = NamedTemporaryFile(dir=directory, delete=False)
    temp_file.close()
    temp_file = Path(temp_file.name)

    write_sparse_file(temp_file, b"~Testing~", SIZE)
    temp_file_size = size_on_disk(temp_file)
    if temp_file_size is None:
        return False

    is_sparse = temp_file_size < SIZE
    temp_file.unlink()

    return is_sparse
Esempio n. 16
0
 def upload(self, src_name, dst_name, compress=True, use_gzip=False):
     if compress:
         fout = NamedTemporaryFile(suffix='.gz', mode='wb', delete=False)
         try:
             if use_gzip:
                 logging.debug('Compressing file %s with gzip...', src_name)
                 p = subprocess.Popen(["gzip", '-c', src_name], stdout=fout)
                 assert p.wait() == 0, 'Gzip compression failed'
                 fout.close()
                 return self._upload(fout.name, dst_name + '.gz')
             else:
                 fout.close()
                 logging.debug('Compressing file %s...', src_name)
                 with \
                         open(src_name, 'rb') as fin, \
                         closing(gzip.GzipFile(fout.name, mode='wb')) as gzout:
                     for chunk in iterchunks(fin):
                         gzout.write(chunk)
                 return self._upload(fout.name, dst_name + '.gz')
         finally:
             fout.unlink(fout.name)
     else:
         self._upload(src_name, dst_name)
Esempio n. 17
0
def _save_samba_share(conf):
    temp = NamedTemporaryFile('w', delete=False)

    conf.write(temp)
    temp.close()

    bkp_date = datetime.datetime.now().strftime('%Y-%m-%d_%Hh%Mm%Ss')


    process = run_as_root('cp "/etc/samba/smb.conf" '\
                          '"/etc/samba/smb.conf-{0}.bkp"'
                          .format(bkp_date))
    process.expect(pexpect.EOF)

    process = run_as_root('cp "{0}" "/etc/samba/smb.conf"'.format(temp.name))
    process.expect(pexpect.EOF)

    process = run_as_root('chmod 644 /etc/samba/smb.conf')
    process.expect(pexpect.EOF)

    process = run_as_root('chown root:root /etc/samba/smb.conf')
    process.expect(pexpect.EOF)

    temp.unlink(temp.name)
Esempio n. 18
0
    def handle(self, *args, **options):
        if not GOOGLE_DOCS_ACCOUNT and GOOGLE_DOCS_PASSWORD and GOOGLE_DOCS_RESOURCE_ID:
            raise CommandError('You must set both GOOGLE_DOCS_ACCOUNT, GOOGLE_DOCS_PASSWORD and GOOGLE_DOCS_RESOURCE_ID in your settings file.')
        verbosity = int(options.get('verbosity', 1))
        output_all = options.get('output_all')
        dry_run = options.get('dry_run')
        fields = ('email', 'first_name', 'last_name', 'phone', 'city', 'state', 'zipcode', 'is_a', 'broadcasters', 'date_created', 'share_info')

        profile_list = NonUserProfile.objects.order_by('-date_created')
        if not output_all:
            profile_list = profile_list.filter(share_info=True)

        if len(profile_list):
            if verbosity > 1:
                self.stdout.write('{0} signups to record.'.format(len(profile_list)))
            fp = NamedTemporaryFile(delete=False)
            writer = csv.DictWriter(fp, fields)
            writer.writeheader()

            for signup in profile_list:
                output = {
                    'email': signup.email,
                    'first_name': signup.first_name,
                    'last_name': signup.last_name,
                    'phone': signup.phone,
                    'city': signup.city,
                    'state': signup.state,
                    'zipcode': signup.zipcode,
                    'is_a': signup.is_a,
                    'date_created': signup.date_created.strftime('%m/%d/%Y %H:%M:%S'),
                    'share_info': signup.share_info
                }
                extra_fields_data = signup.extra_fields
                for extra_field in SIGNUP_EXTRA_FIELDS:
                    input_val = None
                    if isinstance(extra_fields_data[extra_field], list):
                        input_val = ', '.join(extra_fields_data[extra_field])
                    else:
                        input_val = extra_fields_data[extra_field]
                    output[extra_field] = input_val
                writer.writerow(output)
            if dry_run:
                self.stdout.write('Row created:\n{0}\n'.format('|'.join([str(output[f]) for f in fields])))
            del(writer)

            if not dry_run:
                client = gdata.docs.client.DocsClient()
                login_token = client.ClientLogin(GOOGLE_DOCS_ACCOUNT, GOOGLE_DOCS_PASSWORD, 'politicaladsleuth')
                fp.close()
                media = gdata.data.MediaSource(file_path=fp.name, content_type='text/csv')
                try:
                    resource = client.get_resource_by_id(GOOGLE_DOCS_RESOURCE_ID)
                    updated_resource = client.update_resource(resource, media=media, update_metadata=False, new_revision=True)
                    self.stdout.write('Data uploaded to "%s"\n'.format(updated_resource.title.text))
                except gdata.client.RequestError as e:
                    self.stderr.write(e.message + '\n')
                    self.stdout.write('****Upload may have succeeded despite an InvalidEntryException error****\n')

            fp.close()
            fp.unlink(fp.name)
        else:
            self.stdout.write('No signups for the given parameters\n')
class FacadeTest(TestCase):
    def setUp(self):
        cfg = ConfigParser()
        cfg.add_section('some_section')
        cfg.set('some_section', 'foo', 'bar')
        cfg.set('some_section', 'host', 'foo')
        cfg.set('some_section', 'port', '29192')
        self.config_file = NamedTemporaryFile(mode='w', delete=False)
        cfg.write(self.config_file)
        self.config_file.close()

        self.env_file = NamedTemporaryFile(mode='w', delete=False)
        cfg = ConfigParser()
        cfg.add_section('some_section')
        cfg.set('some_section', 'host', 'SOME_HOST_NAME')
        cfg.set('some_section', 'port', 'SOME_PORT_NAME')

        cfg.add_section('other_section')
        cfg.set('other_section', 'foo', 'bar')
        cfg.write(self.env_file)
        self.env_file.close()

    def tearDown(self):
        self.config_file.unlink(self.config_file.name)
        self.env_file.unlink(self.env_file.name)

    def test_not_existing_config(self):
        configuration = get_configuration('non_existing_section')
        self.assertIsNone(configuration)

    def test_not_existing_variables(self):
        configuration = get_configuration('messaging',
                                          config_file='non-existing')
        self.assertIsNone(configuration)

    def test_existing_config(self):
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables=self.env_file.name)
        self.assertTrue('host' in configuration)
        self.assertTrue('port' in configuration)
        self.assertEquals(configuration['host'], 'foo')
        self.assertEquals(configuration['port'], '29192')
        self.assertFalse('foo' in configuration)

    def test_existing_env(self):
        os.environ['SOME_HOST_NAME'] = 'bar'
        os.environ['SOME_PORT_NAME'] = '6661'
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables=self.env_file.name)
        self.assertTrue('host' in configuration)
        self.assertTrue('port' in configuration)
        self.assertEquals(configuration['host'], 'bar')
        self.assertEquals(configuration['port'], '6661')
        self.assertFalse('foo' in configuration)
        os.environ.pop('SOME_HOST_NAME')
        os.environ.pop('SOME_PORT_NAME')

    def test_not_in_config(self):
        configuration = get_configuration(section_name='other_section',
                                          config_file=self.config_file.name,
                                          variables=self.env_file.name)
        self.assertIsNone(configuration)
Esempio n. 20
0
    def store(self, file=None, content=None, ctype=None, **kwd):
        """save a file-like item"""
        if content is None and not hasattr(file, 'read'):
            raise TypeError('invalid file-like object')

        data = content if content is not None else file.read()
        size = len(data)
        ext = guessImageType(data[:32])
        if ext is None:
            raise ValueError('invalid image file')

        hashes = [md5(data).hexdigest()]
        _exists_id = self.exists(hashed=hashes[0])
        if _exists_id:
            id = _exists_id
            filename = _make_filename(id, ext)
            print('id {} or hash {} exists!!'.format(id, hashes[0]))
            #raise DuplicateError('already exists')
            return [True, id, filename]
        ids = [_make_id(hashes[0])]
        if 'id' in kwd and kwd['id'] and kwd['id'] not in ids:
            ids += [kwd['id']]

        from image import SimpImage, MIN_QUALITY

        max_file_size = int(self.get_config('max_file_size'))
        max_jpeg_quality = int(self.get_config('max_jpeg_quality'))
        max_width = int(self.get_config('max_width'))
        max_height = int(self.get_config('max_height'))

        if size > max_file_size: max_jpeg_quality -= 1
        if max_jpeg_quality < MIN_QUALITY: max_jpeg_quality = MIN_QUALITY

        im = SimpImage(blob=data)
        meta = im.meta
        if meta['width'] > max_width or meta['height'] > max_height:
            if self.get_config('auto_scale') and im.thumbnail(
                    max_width, max_height):
                if im.format == 'JPEG' and im.quality > max_jpeg_quality:
                    im.quality = max_jpeg_quality
                data = im.get_blob()
                size = len(data)
                print im.meta
                print 'new scaled size {}'.format(size)
                hashes += [md5(data).hexdigest()]
            else:
                raise ValueError(
                    'file: {} dimension {}x{} is too big, max is {}x{}'.format(
                        kwd['name'] if 'name' in kwd else '', meta['width'],
                        meta['height'], max_width, max_height))

        if im.format == 'JPEG':
            if im.quality > max_jpeg_quality:
                print 'quality {} is too high, hash {}'.format(
                    im.quality, hashes[0])
                from tempfile import NamedTemporaryFile
                _tmp = NamedTemporaryFile('w+b',
                                          dir=self.get_config('temp_root'),
                                          delete=False)
                _tmp.file.close()
                save_file(_tmp.name, blob=data)
                if jpegoptim(_tmp.name):
                    fp = open(_tmp.name)
                    data = fp.read()
                    size = len(data)

                    # print 'new optimized size {}'.format(size)
                    fp.close()
                    _tmp.unlink(_tmp.name)
                    del im
                    im = SimpImage(blob=data)
                    meta = im.meta
                    hashes += [md5(data).hexdigest()]
                else:
                    raise EnvironmentError(
                        'jpeg qualty is too high, or need jpegoptim')
        elif im.format == 'PNG' and self.get_config('force_jpeg'):
            im.format = 'JPEG'
            im.quality = max_jpeg_quality
            data = im.get_blob()
            size = len(data)
            hashes += [md5(data).hexdigest()]
            ext = 'jpg'
            meta = im.meta
        del im

        if (size > max_file_size):
            raise ValueError('file: {} size {} is too big, max is {}'.format(
                kwd['name'] if 'name' in kwd else '', size, max_file_size))

        hashed = hashes[len(hashes) - 1]  #md5(data).hexdigest()
        # print ('md5 hash: {}'.format(hashed))

        # TODO: add for support (md5 + size) id
        id = _make_id(hashed)

        # print ('new filename: %r' % filename)

        # TODO: fix for support s3 front browse
        _exists_id = self.exists(id) or self.exists(hashed=hashed)
        if _exists_id:
            id = _exists_id
            filename = _make_filename(id, ext)
            print('id {} or hash {} exists!!'.format(id, hashed))
            #raise DuplicateError('already exists')
            return [True, id, filename]
        filename = _make_filename(id, ext)
        # print ('id: {}'.format(id))

        # if ctype is None or ctype == '':
        from _util import guess_mimetype
        ctype = guess_mimetype(filename)

        # save to mongodb
        spec = {
            '_id': id,
            'filename': filename,
            'hash': hashes,
            'mime': ctype,
            'size': size,
            'meta': meta,
            'ids': ids
        }

        if 'name' in kwd and isinstance(kwd['name'], (str, unicode)):
            spec['name'] = kwd['name']

        for k in ['created', 'app_id']:
            if k in kwd and kwd[k]:
                spec[k] = kwd[k]

        if self._store_exists(id, filename=filename):
            self._save_meta(id, spec)
            return [True, id, filename]

        rr = self._put(data, **spec)
        if rr:
            return [True, rr, filename]
class ConfigManagerTest(unittest.TestCase):
    def setUp(self):
        cfg = ConfigParser.ConfigParser()
        cfg.add_section('some_section')
        cfg.set('some_section', 'host', 'foo')
        cfg.set('some_section', 'port', '29192')
        cfg.set('some_section', 'backend', 'tests.test_config_manager.Backend')
        self.config_file = NamedTemporaryFile(mode='w', delete=False)
        cfg.write(self.config_file)
        self.config_file.close()

        self.env_file = NamedTemporaryFile(mode='w', delete=False)
        cfg = ConfigParser.ConfigParser()
        cfg.add_section('some_section')
        cfg.set('some_section', 'host', 'SOME_HOST_NAME')
        cfg.set('some_section', 'port', 'SOME_PORT_NAME')

        cfg.add_section('other_section')
        cfg.set('other_section', 'foo', 'bar')
        cfg.write(self.env_file)
        self.env_file.close()

    def tearDown(self):
        self.config_file.unlink(self.config_file.name)
        self.env_file.unlink(self.env_file.name)

    def test_not_existing_section(self):
        configuration = get_configuration('not-existing-section',
                                          config_file=self.config_file.name)
        self.assertIsNone(configuration)

    def test_existing_config(self):
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables_file=self.env_file.name)
        self.assertTrue('host' in configuration)
        self.assertTrue('port' in configuration)
        self.assertEquals(configuration['host'], 'foo')
        self.assertEquals(configuration['port'], '29192')
        self.assertFalse('foo' in configuration)

    def test_existing_env(self):
        os.environ['SOME_HOST_NAME'] = 'bar'
        os.environ['SOME_PORT_NAME'] = '6661'
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables_file=self.env_file.name)
        self.assertTrue('host' in configuration)
        self.assertTrue('port' in configuration)
        self.assertEquals(configuration['host'], 'bar')
        self.assertEquals(configuration['port'], '6661')
        self.assertFalse('foo' in configuration)
        os.environ.pop('SOME_HOST_NAME')
        os.environ.pop('SOME_PORT_NAME')

    def test_get_backend_class(self):
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables_file=self.env_file.name)
        backend_class = get_backend_class(configuration)
        self.assertTrue('Backend' in str(backend_class))
Esempio n. 22
0
    def module_task(self, params):
        self.logger.info('Starting Module for edinet baseline...')
        """CHECK INCONSISTENCIES IN params"""
        try:
            result_companyId = params['result_companyId']
            ts_to = params['ts_to']
            ts_from = params[
                'ts_from'] if 'ts_from' in params else date_n_month(
                    ts_to, -24)
            energyTypeList = params['type'] if 'type' in params else []
        except KeyError as e:
            raise Exception(
                'Not enough parameters provided to module: {}'.format(e))

        ######################################################################################################################################################################################
        """ GET DATA FROM MONGO TO MAKE QUERYS """
        ######################################################################################################################################################################################
        if not energyTypeList:
            energyTypeList = list(
                set([
                    x['type']
                    for x in self.mongo['readings'].find({}, {'type': 1})
                ]))

        #####################################################################################################################################################################################
        """  LOAD DATA FROM HIVE  """
        ######################################################################################################################################################################################

        self.logger.info('Extracting data from mongodb')

        # setting variables for readability
        collection = self.config['mongodb']['modelling_units_collection']

        self.logger.debug('Querying for modelling units in MongoDB')
        cursor = self.mongo[collection].find({})

        device_key = {}
        stations = {}
        for item in cursor:
            if len(item['devices']) > 0:  # to avoid empty list of devices
                for dev in item['devices']:
                    stations[str(dev['deviceId'].encode('utf-8'))] = str(
                        item['stationId']) if 'stationId' in item else None
                    if str(dev['deviceId'].encode(
                            'utf-8')) in device_key.keys():
                        device_key[str(
                            dev['deviceId'].encode('utf-8'))].append(
                                str(item['modellingUnitId']) + '~' +
                                str(item['devices']))
                    else:
                        device_key[str(dev['deviceId'].encode('utf-8'))] = [
                            str(item['modellingUnitId']) + '~' +
                            str(item['devices'])
                        ]

        self.logger.info('A mongo query process has loaded {} devices'.format(
            len(device_key.keys())))

        ######################################################################################################################################################################################
        """ HIVE QUERY TO PREPARE DATA FOR MRJOB """
        ######################################################################################################################################################################################
        # create a table to link devices with stations
        device_stations_df = pd.DataFrame(data={
            "deviceId": stations.keys(),
            "stationId": stations.values()
        },
                                          columns=["deviceId", "stationId"])
        f = NamedTemporaryFile(delete=False, suffix='.csv')
        device_stations_df.to_csv(f.name, header=None, index=None)
        f.close()
        call([
            "hadoop", "fs", "-mkdir", "-p", f.name,
            self.config['paths']['stations']
        ])
        call([
            "hadoop", "fs", "-copyFromLocal", f.name,
            self.config['paths']['stations']
        ])
        f.unlink(f.name)
        device_stations = create_hive_module_input_table(
            self.hive,
            'edinet_device_stations_table',
            self.config['paths']['stations'], [('deviceId', 'string'),
                                               ('stationId', 'string')],
            self.task_UUID,
            sep=",")
        self.context.add_clean_hive_tables(device_stations)

        # create a table with the devices values

        fields = [('deviceId', 'string'), ('ts', 'int'), ('value', 'float'),
                  ('energyType', 'string'), ('source', 'string'),
                  ('temperature', 'string')]

        location = self.config['paths']['measures']

        input_table = create_hive_module_input_table(self.hive,
                                                     'edinet_baseline_input',
                                                     location, fields,
                                                     self.task_UUID)

        #add input table to be deleted after execution
        self.context.add_clean_hive_tables(input_table)
        qbr = RawQueryBuilder(self.hive)
        sentence = """
            INSERT OVERWRITE TABLE {input_table}
            SELECT a.deviceId, a.ts, a.value, a.energyType, a.source, c.temperature FROM
                (SELECT ai.deviceid as deviceId, ai.ts as ts, ai.value as value, ai.energyType as energyType, ai.source as source FROM edinet_hourly_consumption ai
                    WHERE
                        ai.ts >= UNIX_TIMESTAMP("{ts_from}","yyyy-MM-dd HH:mm:ss") AND
                        ai.ts <= UNIX_TIMESTAMP("{ts_to}","yyyy-MM-dd HH:mm:ss")) a
                JOIN {device_stations} b on a.deviceId==b.deviceId
                JOIN  edinet_meteo c on b.stationId==c.stationId and SUBSTR(FROM_UNIXTIME(a.ts), 1, 13) == SUBSTR(FROM_UNIXTIME(c.ts), 1, 13)
                """.format(input_table=input_table,
                           ts_from=ts_from,
                           ts_to=ts_to,
                           device_stations=device_stations)

        self.logger.debug(sentence)
        qbr.execute_query(sentence)

        ######################################################################################################################################################################################
        """ SETUP MAP REDUCE JOB """
        ######################################################################################################################################################################################

        self.logger.info('Getting')
        try:
            # Launch MapReduce job
            ## Buffered measures to HBase
            self.logger.debug('MRJob Align')
            self.launcher_hadoop_job('align', location, result_companyId,
                                     device_key, stations)
        except Exception as e:
            raise Exception('MRJob ALIGN process job has failed: {}'.format(e))
        self.logger.info('Module EDINET_baseline execution finished...')
class ConfigManagerTest(unittest.TestCase):
    def setUp(self):
        cfg = ConfigParser.ConfigParser()
        cfg.add_section('some_section')
        cfg.set('some_section', 'host', 'foo')
        cfg.set('some_section', 'port', '29192')
        cfg.set('some_section', 'backend', 'tests.test_config_manager.Backend')
        self.config_file = NamedTemporaryFile(mode='w', delete=False)
        cfg.write(self.config_file)
        self.config_file.close()

        self.env_file = NamedTemporaryFile(mode='w', delete=False)
        cfg = ConfigParser.ConfigParser()
        cfg.add_section('some_section')
        cfg.set('some_section', 'host', 'SOME_HOST_NAME')
        cfg.set('some_section', 'port', 'SOME_PORT_NAME')

        cfg.add_section('other_section')
        cfg.set('other_section', 'foo', 'bar')
        cfg.write(self.env_file)
        self.env_file.close()

    def tearDown(self):
        self.config_file.unlink(self.config_file.name)
        self.env_file.unlink(self.env_file.name)

    def test_not_existing_section(self):
        configuration = get_configuration('not-existing-section',
                                          config_file=self.config_file.name)
        self.assertIsNone(configuration)

    def test_existing_config(self):
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables_file=self.env_file.name)
        self.assertTrue('host' in configuration)
        self.assertTrue('port' in configuration)
        self.assertEquals(configuration['host'], 'foo')
        self.assertEquals(configuration['port'], '29192')
        self.assertFalse('foo' in configuration)

    def test_existing_env(self):
        os.environ['SOME_HOST_NAME'] = 'bar'
        os.environ['SOME_PORT_NAME'] = '6661'
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables_file=self.env_file.name)
        self.assertTrue('host' in configuration)
        self.assertTrue('port' in configuration)
        self.assertEquals(configuration['host'], 'bar')
        self.assertEquals(configuration['port'], '6661')
        self.assertFalse('foo' in configuration)
        os.environ.pop('SOME_HOST_NAME')
        os.environ.pop('SOME_PORT_NAME')

    def test_get_backend_class(self):
        configuration = get_configuration(section_name='some_section',
                                          config_file=self.config_file.name,
                                          variables_file=self.env_file.name)
        backend_class = get_backend_class(configuration)
        self.assertTrue('Backend' in str(backend_class))
Esempio n. 24
0
	def store(self, file=None, content=None, ctype=None, **kwd):
		"""save a file-like item"""
		if content is None and not hasattr(file, 'read'):
			raise TypeError('invalid file-like object')

		data = content if content is not None else file.read()
		size = len(data)
		ext = guessImageType(data[:32])
		if ext is None:
			raise ValueError('invalid image file')

		hashes = [md5(data).hexdigest()]
		_exists_id = self.exists(hashed=hashes[0])
		if _exists_id:
			id = _exists_id
			filename = _make_filename(id, ext)
			print ('id {} or hash {} exists!!'.format(id, hashes[0]))
			#raise DuplicateError('already exists')
			return [True, id, filename]
		ids = [_make_id(hashes[0])]
		if 'id' in kwd and kwd['id'] and kwd['id'] not in ids:
			ids += [kwd['id']]

		from image import SimpImage, MIN_QUALITY

		max_file_size = int(self.get_config('max_file_size'))
		max_jpeg_quality = int(self.get_config('max_jpeg_quality'))
		max_width = int(self.get_config('max_width'))
		max_height = int(self.get_config('max_height'))

		if size > max_file_size: max_jpeg_quality -= 1
		if max_jpeg_quality < MIN_QUALITY: max_jpeg_quality = MIN_QUALITY

		im = SimpImage(blob=data)
		meta = im.meta
		if meta['width'] > max_width or meta['height'] > max_height:
			if self.get_config('auto_scale') and im.thumbnail(max_width, max_height):
				if im.format == 'JPEG' and im.quality > max_jpeg_quality:
					im.quality = max_jpeg_quality
				data = im.get_blob()
				size = len(data)
				print im.meta
				print 'new scaled size {}'.format(size)
				hashes += [md5(data).hexdigest()]
			else:
				raise ValueError('file: {} dimension {}x{} is too big, max is {}x{}'.format(kwd['name'] if 'name' in kwd else '', meta['width'], meta['height'], max_width, max_height))

		if im.format == 'JPEG':
			if im.quality > max_jpeg_quality:
				print 'quality {} is too high, hash {}'.format(im.quality, hashes[0])
				from tempfile import NamedTemporaryFile
				_tmp = NamedTemporaryFile('w+b',dir=self.get_config('temp_root'),delete=False)
				_tmp.file.close()
				save_file(_tmp.name, blob=data)
				if jpegoptim(_tmp.name):
					fp = open(_tmp.name)
					data = fp.read()
					size = len(data)

					# print 'new optimized size {}'.format(size)
					fp.close()
					_tmp.unlink(_tmp.name)
					del im
					im = SimpImage(blob=data)
					meta = im.meta
					hashes += [md5(data).hexdigest()]
				else:
					raise EnvironmentError('jpeg qualty is too high, or need jpegoptim')
		elif im.format == 'PNG' and self.get_config('force_jpeg'):
			im.format = 'JPEG'
			im.quality = max_jpeg_quality
			data = im.get_blob()
			size = len(data)
			hashes += [md5(data).hexdigest()]
			ext = 'jpg'
			meta = im.meta
		del im

		if (size > max_file_size):
			raise ValueError('file: {} size {} is too big, max is {}'.format(kwd['name'] if 'name' in kwd else '', size, max_file_size))

		hashed = hashes[len(hashes)-1] #md5(data).hexdigest()
		# print ('md5 hash: {}'.format(hashed))

		# TODO: add for support (md5 + size) id
		id = _make_id(hashed)

		# print ('new filename: %r' % filename)

		# TODO: fix for support s3 front browse
		_exists_id = self.exists(id) or self.exists(hashed=hashed)
		if _exists_id:
			id = _exists_id
			filename = _make_filename(id, ext)
			print ('id {} or hash {} exists!!'.format(id, hashed))
			#raise DuplicateError('already exists')
			return [True, id, filename]
		filename = _make_filename(id, ext)
		# print ('id: {}'.format(id))

		# if ctype is None or ctype == '':
		from _util import guess_mimetype
		ctype = guess_mimetype(filename)

		# save to mongodb
		spec = {'_id': id,'filename': filename, 'hash': hashes, 'mime': ctype, 'size': size, 'meta': meta, 'ids': ids}

		if 'name' in kwd and isinstance(kwd['name'], (str, unicode)):
			spec['name'] = kwd['name']

		for k in ['created', 'app_id']:
			if k in kwd and kwd[k]:
				spec[k] = kwd[k]

		if self._store_exists(id, filename=filename):
			self._save_meta(id, spec)
			return [True, id, filename]

		rr = self._put(data, **spec)
		if rr:
			return [True, rr, filename]
Esempio n. 25
0
    def _merge(self, cc_dir):
        md5_hashes = {}
        if HASH_FILE.exists():
            for line in [l.strip() for l in HASH_FILE.read_text().split("\n")]:
                if line:
                    values = line.rsplit(":", maxsplit=1)
                    if len(values) == 2 and values[0] and values[1]:
                        md5_hashes[values[0]] = values[1]

        try:
            p = subprocess.run(
                "git -C \"{cc_dir}\" status --porcelain -uall".format(
                    **locals()),
                shell=True,
                stdout=subprocess.PIPE,
                check=True)
            merge_files = []
            status = str(p.stdout, "utf-8").strip()
            for line in [s.strip() for s in status.split("\n")]:
                if line:
                    merge_files.append(tuple(line.split()))
        except subprocess.CalledProcessError as err:
            raise nicfit.CommandError(str(err))

        for st, file in merge_files + self.args.extra_merge:
            dst = Path(file)
            src = cc_dir / dst

            hasher = md5()
            try:
                hasher.update(src.read_bytes())
            except FileNotFoundError as notfound:
                perr(notfound)
                continue
            md5sum = hasher.hexdigest()
            merge_file = (self.args.ignore_md5s or file not in md5_hashes
                          or md5sum != md5_hashes[file])
            pout("Comparing {} hash({}): {}".format(
                file, md5sum,
                Fg.blue("new") if merge_file else Fg.green("merged")))
            md5_hashes[file] = md5sum

            if merge_file:
                tmp_dst = None
                if not dst.exists():
                    tmp_dst = NamedTemporaryFile("w",
                                                 suffix=dst.suffix,
                                                 delete=False)
                    # Write the file to exist on disk for diff and merge
                    tmp_dst.close()
                    tmp_dst = Path(tmp_dst.name)

                dst_file = str(dst if tmp_dst is None else tmp_dst)
                diffs = subprocess.run("diff '{src}' '{dst_file}' >/dev/null"
                                       .format(**locals()), shell=True)\
                                  .returncode != 0
                pout("Differences: {}".format(diffs))
                if diffs:
                    merge_cmd = self.args.merge_cmd
                    if merge_cmd is None:
                        for cmd, opts in MERGE_TOOLS.items():
                            if shutil.which(cmd):
                                merge_cmd = " ".join([cmd, opts or ""])
                                break
                    if merge_cmd is not None:
                        subprocess.run(
                            "{merge_cmd} '{src}' '{dst_file}'".format(
                                **locals()),
                            shell=True,
                            check=True)
                    else:
                        perr("Merge disabled, no merge command found. Install "
                             "a merge tool such as: {tools}.\nOr use "
                             "--merge-cmd to specify your own.".format(
                                 tools=", ".join(MERGE_TOOLS.keys())))

                if tmp_dst and tmp_dst.stat().st_size == 0:
                    tmp_dst.unlink()
                elif tmp_dst:
                    # Move tmp file into place and create parent dirs
                    if not dst.parent.exists():
                        dst.parent.mkdir(0o755, parents=True)
                    shutil.move(str(tmp_dst), str(dst))

        with HASH_FILE.open("w") as hash_file:
            for f in sorted(md5_hashes.keys()):
                hash_file.write("{}:{}\n".format(f, md5_hashes[f]))
Esempio n. 26
0
    def handle(self, *args, **options):
        if not GOOGLE_DOCS_ACCOUNT and GOOGLE_DOCS_PASSWORD and GOOGLE_DOCS_RESOURCE_ID:
            raise CommandError(
                'You must set both GOOGLE_DOCS_ACCOUNT, GOOGLE_DOCS_PASSWORD and GOOGLE_DOCS_RESOURCE_ID in your settings file.'
            )
        verbosity = int(options.get('verbosity', 1))
        output_all = options.get('output_all')
        dry_run = options.get('dry_run')
        fields = ('email', 'first_name', 'last_name', 'phone', 'city', 'state',
                  'zipcode', 'is_a', 'broadcasters', 'date_created',
                  'share_info')

        profile_list = NonUserProfile.objects.order_by('-date_created')
        if not output_all:
            profile_list = profile_list.filter(share_info=True)

        if len(profile_list):
            if verbosity > 1:
                self.stdout.write('{0} signups to record.'.format(
                    len(profile_list)))
            fp = NamedTemporaryFile(delete=False)
            writer = csv.DictWriter(fp, fields)
            writer.writeheader()

            for signup in profile_list:
                output = {
                    'email':
                    signup.email,
                    'first_name':
                    signup.first_name,
                    'last_name':
                    signup.last_name,
                    'phone':
                    signup.phone,
                    'city':
                    signup.city,
                    'state':
                    signup.state,
                    'zipcode':
                    signup.zipcode,
                    'is_a':
                    signup.is_a,
                    'date_created':
                    signup.date_created.strftime('%m/%d/%Y %H:%M:%S'),
                    'share_info':
                    signup.share_info
                }
                extra_fields_data = signup.extra_fields
                for extra_field in SIGNUP_EXTRA_FIELDS:
                    input_val = None
                    if isinstance(extra_fields_data[extra_field], list):
                        input_val = ', '.join(extra_fields_data[extra_field])
                    else:
                        input_val = extra_fields_data[extra_field]
                    output[extra_field] = input_val
                writer.writerow(output)
            if dry_run:
                self.stdout.write('Row created:\n{0}\n'.format('|'.join(
                    [str(output[f]) for f in fields])))
            del (writer)

            if not dry_run:
                client = gdata.docs.client.DocsClient()
                login_token = client.ClientLogin(GOOGLE_DOCS_ACCOUNT,
                                                 GOOGLE_DOCS_PASSWORD,
                                                 'politicaladsleuth')
                fp.close()
                media = gdata.data.MediaSource(file_path=fp.name,
                                               content_type='text/csv')
                try:
                    resource = client.get_resource_by_id(
                        GOOGLE_DOCS_RESOURCE_ID)
                    updated_resource = client.update_resource(
                        resource,
                        media=media,
                        update_metadata=False,
                        new_revision=True)
                    self.stdout.write('Data uploaded to "%s"\n'.format(
                        updated_resource.title.text))
                except gdata.client.RequestError as e:
                    self.stderr.write(e.message + '\n')
                    self.stdout.write(
                        '****Upload may have succeeded despite an InvalidEntryException error****\n'
                    )

            fp.close()
            fp.unlink(fp.name)
        else:
            self.stdout.write('No signups for the given parameters\n')