Python Client.delete Beispiele, snakebite.client.Client.delete Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: hdfs.py Projekt: Pandinosaurus/tuttle

 def remove(self):
     client = Client(self._host,
                     self._port,
                     effective_user=self._user,
                     use_trash=False)
     it = client.delete([self._partial], recurse=True)
     for elmt in it:
         pass

Beispiel #2

0

Datei anzeigen

def delete_item(config, filepath='', localpath=''):

    if(config['BACKEND'] == 'hdfs'):
        client = Client(socket.gethostname(), config['HADOOP_RPC_PORT'], use_trash=False)
        del_gen = client.delete([filepath], recurse=True)
        for del_item in del_gen:
            pass
    elif(config['BACKEND'] == 'swift'):
        pass  # To be implemented

    # Deleting modules or datasets from local directories (will also suffice for nfs backend)
    if(os.path.isdir(localpath)):  # Check if it is a dataset
        shutil.rmtree(localpath)
    else:
        try:
            os.remove(localpath)
        except OSError:
            pass

Beispiel #3

0

Datei anzeigen

Datei: crfminimal.py Projekt: cjsanjay/dig-crf

def crfalign(sc, inputFilename, outputDirectory, 
            limit=LIMIT, location='hdfs', outputFormat="text", partitions=None, deleteFirst=True):

    # crfConfigDir = os.path.join(os.path.dirname(__file__), "data/config")
    # def cpath(n):
    #     return os.path.join(crfConfigDir, n)

    # smEyeColor = HybridJaccard(ref_path=cpath("eyeColor_reference_wiki.txt"),
    #                            config_path=cpath("eyeColor_config.txt"))
    # smHairColor = HybridJaccard(ref_path=cpath("hairColor_reference_wiki.txt"),
    #                             config_path=cpath("hairColor_config.txt"))
    # print smEyeColor, smHairColor

    if location == "hdfs":
        if deleteFirst:
            namenode = "memex-nn1"
            port = 8020
            client = Client(namenode, 8020, use_trash=True)
            try:
                for deleted in client.delete([outputDirectory], recurse=True):
                    print deleted
            except FileNotFoundException as e:
                pass

    # hypothesis1: data fetched this way prompts the lzo compression error
    # hypothesis2: but it doesn't matter, error is just a warning
    rdd_crfl = sc.textFile(inputFilename)
    rdd_crfl.setName('rdd_crfl')

    if limit:
        rdd_crfl = sc.parallelize(rdd_crfl.take(limit))
    if partitions:
        rdd_crfl = rdd_crfl.repartition(partitions)

    rdd_final = rdd_crfl
    print outputFormat
    if outputFormat == "sequence":
        rdd_final.saveAsSequenceFile(outputDirectory)
    elif outputFormat == "text":
        print "saving to %s" % outputDirectory
        rdd_final.saveAsTextFile(outputDirectory)
    else:
        raise RuntimeError("Unrecognized output format: %s" % outputFormat)

Beispiel #4

0

Datei anzeigen

Datei: update_profiles.py Projekt: kevangel79/argo-streaming

class HdfsReader:
    """
    HdfsReader class

    Connects to an hdfs endpoint (namenode) and checks argo profile files stored there
    Uses a specific base path for determining argo file destinations
    """
    def __init__(self, namenode, port, base_path):
        """
        Initialized HdfsReader which is used to check/read profile files from hdfs
        Args:
            namenode: str. hdfs namenode host
            port: int. hdfs namenode port
            base_path: str. base path to  destination used for argo
        """
        self.client = Client(namenode, port)
        self.base_path = base_path

    def gen_profile_path(self, tenant, report, profile_type):
        """
        Generates a valid hdfs path to a specific profile
        Args:
            tenant: str. tenant to be used
            report: str. report to be used
            profile_type: str. profile_type (operations|reports|aggregations|thresholds)

        Returns:
            str: hdfs path

        """
        templates = dict()
        templates.update({
            'operations': '{0}_ops.json',
            'aggregations': '{0}_{1}_ap.json',
            'reports': '{0}_{1}_cfg.json',
            'thresholds': '{0}_{1}_thresholds.json',
            'recomputations': 'recomp.json'
        })

        sync_path = self.base_path.replace("{{tenant}}", tenant)
        filename = templates[profile_type].format(tenant, report)
        return os.path.join(sync_path, filename)

    def cat(self, tenant, report, profile_type):
        """
        Returns the contents of a profile stored in hdfs
        Args:
            tenant: str. tenant name
            report: str. report name
            profile_type: str. profile type (operations|reports|aggregations|thresholds)

        Returns:

        """
        path = self.gen_profile_path(tenant, report, profile_type)
        try:
            txt = self.client.cat([path])
            j = json.loads(txt.next().next())
            return j, True
        except FileNotFoundException:
            return None, False

    def rem(self, tenant, report, profile_type):
        """
        Removes a profile file that already exists in hdfs (in order to be replaced)
        Args:
            tenant: str. tenant name
            report: str. report name
            profile_type: str. profile type (operations|reports|aggregations|thresholds)

        Returns:

        """
        path = self.gen_profile_path(tenant, report, profile_type)

        try:
            self.client.delete([path]).next()
            return True
        except FileNotFoundException:
            return False

Beispiel #5

0

Datei anzeigen

    swift_client = swift.Connection(user=swift_user,
                                    key=swift_key,
                                    authurl=swift_authurl)

# read list of files
src_files = []

if run_mode == "hdfs":
    # spotify's snakebite as hdfs client
    src_files = [
        hdfs_url + files['path'] for files in hdfs_client.ls([source_files])
    ]

    # deleting output directory if exists
    if (hdfs_client.test(target_dir, exists=True, directory=True)):
        hdfs_client.delete(target_dir)
        hdfs_client.rmdir(target_dir)

elif run_mode == "swift":
    # read list of files from swift  src_files = []
    src_file_regex = re.compile(source_files)
    for data in swift_client.get_container(source_dir)[1]:
        if src_file_regex.match(data['name']):
            src_files.append(data['name'])

    src_files.sort(key=lambda x: os.path.basename(x))

else:
    # read list of files from local
    src_files = filter(os.path.isfile,
                       glob.glob(os.path.join(source_dir, source_files)))

Beispiel #6

0

Datei anzeigen

Datei: delete.py Projekt: bopopescu/dev-1

from snakebite.client import Client
from constants import *

client = Client('localhost', NAMENODE_PORT)

for p in client.delete(['/foo/bar','/input'], recurse=True):
    print p

Beispiel #7

0

Datei anzeigen

def get_json(request):

    response_data = {}

    if request.method == "GET":

        r = request.GET
        rg = request.GET.get

        ### Фильтр по городу улице дому
        if r.has_key("action") and rg("action") == 'filter-addresslist':

            city = request.GET["city"].strip()
            street = request.GET["street"].strip()
            house = request.GET["house"].strip()

            if city != "" or street != "" or house != "":
                request.session["filter_addresslist"] = pickle.dumps({
                    'city':
                    city,
                    'street':
                    street,
                    'house':
                    house
                })

            if city == "" and street == "" and house == "":
                if request.session.has_key("filter_addresslist"):
                    del request.session["filter_addresslist"]

            response_data = {"result": "ok"}

        ### Очистка фильтр по городу улице дому
        if r.has_key("action") and rg("action") == 'filter-addresslist-clear':

            if request.session.has_key("filter_addresslist"):
                del request.session["filter_addresslist"]

            response_data = {"result": "ok"}

        ### Фильтр по городу улице дому по компании
        if r.has_key("action") and rg("action") == 'filter-company':

            city = request.GET["city"].strip()
            street = request.GET["street"].strip()
            house = request.GET["house"].strip()
            company = request.GET["company"].strip()

            request.session["filter_company"] = pickle.dumps({
                'city': city,
                'street': street,
                'house': house,
                'company': company
            })

            if city == "" and street == "" and house == "" and company == "":
                if request.session.has_key("filter_company"):
                    del request.session["filter_company"]

            response_data = {"result": "ok"}

        ### Очистка фильтр по городу улице дому по компании
        if r.has_key("action") and rg("action") == 'filter-company-clear':

            if request.session.has_key("filter_company"):
                del request.session["filter_company"]

            response_data = {"result": "ok"}

        ### Фильтр по договорам
        if r.has_key("action") and rg("action") == 'filter-contract':

            inn = request.GET["inn"].strip()
            manager = request.GET["manager"].strip()
            company = request.GET["company"].strip()

            request.session["filter_contract"] = pickle.dumps({
                'manager':
                manager,
                'inn':
                inn,
                'company':
                company
            })

            if inn == "" and company == "" and manager == "":
                if request.session.has_key("filter_contract"):
                    del request.session["filter_contract"]

            response_data = {"result": "ok"}

        ### Очистка фильтр по договорам
        if r.has_key("action") and rg("action") == 'filter-contract-clear':

            if request.session.has_key("filter_contract"):
                del request.session["filter_contract"]

            response_data = {"result": "ok"}

        ### Cписок логов карточки компании
        if r.has_key("action") and rg("action") == 'get-company-list-logs':
            company_id = request.GET["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))
            log_list = []
            for row in comments_logs.objects.filter(
                    manager=company, log=True).order_by("-datetime_create"):
                log_list.append({
                    "comment":
                    row.comment,
                    "user":
                    row.user.get_full_name(),
                    "date":
                    row.datetime_create.strftime("%d.%m.%Y")
                })

            response_data = {"result": "ok", "data": log_list}

        ### Карточка компании: список коментариев
        if r.has_key("action") and rg("action") == 'get-company-list-comments':
            company_id = request.GET["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))
            comment_list = []
            for row in comments_logs.objects.filter(
                    manager=company, log=False).order_by("-datetime_create"):
                comment_list.append({
                    "comment":
                    row.comment,
                    "user":
                    row.user.get_full_name(),
                    "date":
                    row.datetime_create.strftime("%d.%m.%Y")
                })

            response_data = {"result": "ok", "data": comment_list}

        ### Карточка компании: список договоров
        if r.has_key("action") and rg(
                "action") == 'get-company-list-contracts':
            company_id = request.GET["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))
            contract_list = []
            for row in contracts.objects.filter(
                    company=company).order_by("-datetime_create"):
                contract_list.append({
                    "contract_id":
                    row.id,
                    "num":
                    row.num,
                    "date_begin":
                    row.date_begin.strftime("%d.%m.%Y"),
                    "date_end":
                    row.date_end.strftime("%d.%m.%Y"),
                    "goon":
                    u"Да" if row.goon else u"Нет",
                    "money":
                    "%.2f" % row.money,
                    "period":
                    row.period.name,
                    "manager":
                    row.manager.get_full_name(),
                    "author":
                    row.user.get_full_name(),
                    "create":
                    row.datetime_create.strftime("%d.%m.%Y"),
                    "comment":
                    row.comment
                })

            response_data = {"result": "ok", "data": contract_list}

        ### Карточка компании: данные по одному договору
        if r.has_key("action") and rg("action") == 'get-company-contract-one':
            contract_id = request.GET["contract-id"]
            contract = contracts.objects.get(pk=int(contract_id, 10))

            rec = {
                "contract_id": contract.id,
                "num": contract.num,
                "date_begin": contract.date_begin.strftime("%d.%m.%Y"),
                "date_end": contract.date_end.strftime("%d.%m.%Y"),
                "goon": "yes" if contract.goon else "no",
                "money": "%.2f" % contract.money,
                "period": contract.period.id,
                "manager": contract.manager.id,
                "comment": contract.comment
            }

            response_data = {"result": "ok", "rec": rec}

        ### Удаление договора
        if r.has_key("action") and rg("action") == 'contract-delete':
            contract_id = request.GET["contract_id"]
            contract = contracts.objects.get(pk=int(contract_id, 10))
            company_id = request.GET["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))
            comments_logs.objects.create(
                manager=company,
                user=request.user,
                comment=u"Удален договор {num} ({author} {create})".format(
                    num=contract.num,
                    author=contract.user.get_full_name(),
                    create=contract.datetime_create.strftime("%d.%m.%Y")),
                log=True)
            contract.delete()

            response_data = {"result": "ok"}

        ### Карточка компании: список загруженных файлов
        if r.has_key("action") and rg(
                "action") == 'get-company-list-hdfs-files':
            company_id = request.GET["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))
            file_list = []
            for row in files.objects.filter(
                    company=company).order_by("-datetime_load"):
                file_list.append({
                    "file_id":
                    row.id,
                    "filename":
                    row.filename,
                    "author":
                    row.user.get_full_name(),
                    "create":
                    row.datetime_load.strftime("%d.%m.%Y")
                })

            response_data = {"result": "ok", "data": file_list}

        ### Карточка компании: удаление загруженного файла
        if r.has_key("action") and rg("action") == 'company-file-delete':
            file_id = request.GET["file_id"]
            company_id = request.GET["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))

            fob = files.objects.get(pk=file_id)

            comments_logs.objects.create(
                manager=company,
                user=request.user,
                comment=u"Удален файл {filename}".format(
                    filename=fob.filename),
                log=True)

            fob.delete()

            client = Client('10.6.0.135', 9000)
            for x in client.delete([
                    '/blocks/%s' % file_id,
            ], recurse=True):
                print x

            response_data = {"result": "ok"}

        #### Поиск по компании
        if r.has_key("term") and rg("term") != "":
            term = request.GET["term"]
            obj = []

            data = block_managers.objects.filter(
                Q(name__icontains=term) | Q(inn__icontains=term))

            for row in data:

                obj.append({
                    "label":
                    u"{name} (ИНН {inn})".format(name=row.name, inn=row.inn),
                    "value":
                    row.id
                })

            response_data = obj

        ### Cписок логов карточки дома
        if r.has_key("action") and rg("action") == 'get-house-list-logs':
            house_id = request.GET["house"]
            house = buildings.objects.get(pk=int(house_id, 10))
            log_list = []
            for row in comments_logs.objects.filter(
                    house=house, log=True).order_by("-datetime_create"):
                log_list.append({
                    "comment":
                    row.comment,
                    "user":
                    row.user.get_full_name(),
                    "date":
                    row.datetime_create.strftime("%d.%m.%Y")
                })

            response_data = {"result": "ok", "data": log_list}

        ### Карточка дома: список коментариев
        if r.has_key("action") and rg("action") == 'get-house-list-comments':
            house_id = request.GET["house"]
            house = buildings.objects.get(pk=int(house_id, 10))
            comment_list = []
            for row in comments_logs.objects.filter(
                    house=house, log=False).order_by("-datetime_create"):
                comment_list.append({
                    "comment":
                    row.comment,
                    "user":
                    row.user.get_full_name(),
                    "date":
                    row.datetime_create.strftime("%d.%m.%Y")
                })

            response_data = {"result": "ok", "data": comment_list}

    if request.method == "POST":

        data = eval(request.body)

        ### Сохранение карточки компании
        if data.has_key("action") and data["action"] == 'company-common-save':

            company_id = data["company_id"]
            company = block_managers.objects.get(pk=int(company_id, 10))

            address_id = data["address"]
            address_law_id = data["address_law"]

            address = address_house.objects.get(pk=int(address_id, 10))
            address_law = address_house.objects.get(pk=int(address_law_id, 10))

            name = data["name"].strip()
            inn = data["inn"].strip()
            phone = data["phone"].strip()
            email = data["email"].strip()
            contact = data["contact"].strip()

            company.name = name
            company.inn = inn
            company.phone = phone
            company.email = email
            company.contact = contact
            company.address = address
            company.address_law = address_law

            company.save()

            comments_logs.objects.create(
                manager=company,
                user=request.user,
                comment=u"Сохранены данные карточки компании",
                log=True)

            response_data = {"result": "ok"}

        ### Добавление компании
        if data.has_key(
                "action") and data["action"] == 'company-common-create':

            id_www = int(data["id_www"].strip(), 10)

            address_id = data["address"]
            address_law_id = data["address_law"]

            address = address_house.objects.get(pk=int(address_id, 10))
            address_law = address_house.objects.get(pk=int(address_law_id, 10))

            name = data["name"].strip()
            inn = data["inn"].strip()
            phone = data["phone"].strip()
            email = data["email"].strip()
            contact = data["contact"].strip()

            ### Проверка есть ли уже такой id_www
            if not block_managers.objects.filter(www_id=id_www).exists(
            ) and not block_managers.objects.filter(inn=inn).exists():

                new = block_managers.objects.create(www_id=id_www,
                                                    name=name,
                                                    inn=inn,
                                                    phone=phone,
                                                    email=email,
                                                    contact=contact,
                                                    address=address,
                                                    address_law=address_law)

                comments_logs.objects.create(
                    manager=new,
                    user=request.user,
                    comment=u"Создана карточка компании",
                    log=True)

                response_data = {"result": "ok", "id": new.id}

            else:

                response_data = {"result": "error"}

        ### Добавление коментария компании
        if data.has_key("action") and data["action"] == 'company-comment-add':

            company_id = data["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))

            comment = data["comment"].strip()

            if comment != "":

                comments_logs.objects.create(manager=company,
                                             user=request.user,
                                             comment=comment)

            response_data = {"result": "ok"}

        ### Добавление договора компании
        if data.has_key("action") and data["action"] == 'contract-create':

            company_id = data["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))

            contracts.objects.create(
                company=company,
                num=data["num"].strip(),
                date_begin=datetime.datetime.strptime(data["date_begin"],
                                                      "%d.%m.%Y"),
                date_end=datetime.datetime.strptime(data["date_end"],
                                                    "%d.%m.%Y"),
                goon=True if data["goon"] == "yes" else False,
                money=Decimal(data["money"]),
                period=pay_period.objects.get(pk=int(data["period"], 10)),
                manager=User.objects.get(pk=int(data["manager"], 10)),
                user=request.user,
                comment=data["comment"].strip())

            response_data = {"result": "ok"}

        ### Сохранение договора компании
        if data.has_key("action") and data["action"] == 'contract-edit':

            company_id = data["company"]
            company = block_managers.objects.get(pk=int(company_id, 10))

            contract_id = data["contract_id"]
            contract = contracts.objects.get(pk=int(contract_id, 10))

            contract.num = data["num"].strip()
            contract.date_begin = datetime.datetime.strptime(
                data["date_begin"], "%d.%m.%Y")
            contract.date_end = datetime.datetime.strptime(
                data["date_end"], "%d.%m.%Y")
            contract.goon = True if data["goon"] == "yes" else False
            contract.money = Decimal(data["money"])
            contract.period = pay_period.objects.get(
                pk=int(data["period"], 10))
            contract.manager = User.objects.get(pk=int(data["manager"], 10))
            contract.comment = data["comment"].strip()
            contract.save()

            comments_logs.objects.create(
                manager=company,
                user=request.user,
                comment=u"Сохранены данные договора {num} ({author} {create})".
                format(num=contract.num,
                       author=contract.user.get_full_name(),
                       create=contract.datetime_create.strftime("%d.%m.%Y")),
                log=True)

            response_data = {"result": "ok"}

        ### Сохранение карточки дома
        if data.has_key("action") and data["action"] == 'house-common-save':

            house_id = data["house"]
            house = buildings.objects.get(pk=int(house_id, 10))

            address_id = data["address"]
            address = address_house.objects.get(pk=int(address_id, 10))

            company_id = data["manager"]
            company = block_managers.objects.get(pk=int(company_id, 10))

            numstoreys = data["numstoreys"].strip()
            numentrances = data["numentrances"].strip()
            numfloars = data["numfloars"].strip()
            access = data["access"].strip()

            house.numstoreys = numstoreys
            house.numentrances = numentrances
            house.numfloars = numfloars
            house.access = access
            house.address = address
            house.block_manager = company

            house.save()

            comments_logs.objects.create(
                house=house,
                user=request.user,
                comment=u"Сохранены данные карточки дома",
                log=True)

            response_data = {"result": "ok"}

        ### Создание карточки дома
        if data.has_key("action") and data["action"] == 'house-common-create':

            id_www = int(data["id_www"].strip(), 10)

            address_id = data["address"]
            address = address_house.objects.get(pk=int(address_id, 10))

            company_id = data["manager"]
            company = block_managers.objects.get(pk=int(company_id, 10))

            numstoreys = int(data["numstoreys"].strip(), 10)
            numentrances = int(data["numentrances"].strip(), 10)
            numfloars = int(data["numfloars"].strip(), 10)
            access = data["access"].strip()

            ### Проверка уникальности www_id
            if not block_managers.objects.filter(www_id=id_www).exists():

                new = buildings.objects.create(www_id=id_www,
                                               numstoreys=numstoreys,
                                               numentrances=numentrances,
                                               numfloars=numfloars,
                                               access=access,
                                               address=address,
                                               block_manager=company)

                comments_logs.objects.create(house=new,
                                             user=request.user,
                                             comment=u"Создана карточка дома",
                                             log=True)

                response_data = {"result": "ok", "id": new.id}

            else:
                response_data = {"result": "error"}

        ### Добавление коментария дома
        if data.has_key("action") and data["action"] == 'house-comment-add':

            house_id = data["house"]
            house = buildings.objects.get(pk=int(house_id, 10))

            comment = data["comment"].strip()

            if comment != "":

                comments_logs.objects.create(house=house,
                                             user=request.user,
                                             comment=comment)

            response_data = {"result": "ok"}

    response = HttpResponse(json.dumps(response_data),
                            content_type="application/json")
    response['Access-Control-Allow-Origin'] = "*"
    return response

Beispiel #8

0

Datei anzeigen

Datei: hdfs.py Projekt: lexman/tuttle

 def remove(self):
     client = Client(self._host, self._port, effective_user=self._user, use_trash=False)
     it = client.delete([self._partial], recurse=True)
     for elmt in it:
         pass

Beispiel #9

0

Datei anzeigen

def delete():
	client = Client("study", 9000, use_trash=False)
	client.delete(["/data/gz"], recurse=False)

Beispiel #10

0

Datei anzeigen

Datei: crfalign.py Projekt: cjsanjay/dig-crf

def crfalign(sc, inputFilename, outputDirectory, 
            limit=LIMIT, location='hdfs', outputFormat="text", partitions=None, deleteFirst=True):

    crfConfigDir = os.path.join(os.path.dirname(__file__), "data/config")
    def cpath(n):
        return os.path.join(crfConfigDir, n)

    smEyeColor = HybridJaccard(ref_path=cpath("eyeColor_reference_wiki.txt"),
                               config_path=cpath("eyeColor_config.txt"))
    smHairColor = HybridJaccard(ref_path=cpath("hairColor_reference_wiki.txt"),
                                config_path=cpath("hairColor_config.txt"))
    print smEyeColor, smHairColor

    if location == "hdfs":
        if deleteFirst:
            namenode = "memex-nn1"
            port = 8020
            client = Client(namenode, 8020, use_trash=True)
            try:
                for deleted in client.delete([outputDirectory], recurse=True):
                    print deleted
            except FileNotFoundException as e:
                pass

    # hypothesis1: data fetched this way prompts the lzo compression error
    # hypothesis2: but it doesn't matter, error is just a warning
    if partitions:
        if limit:
            rdd_crfl = sc.parallelize(rdd_crfl.take(limit))
            rdd_crfl = rdd_crfl.repartition(partitions)
        else:
            print inputFilename
            rdd_crfl = sc.textFile(inputFilename, minPartitions=partitions)
    else:
        rdd_crfl = sc.textFile(inputFilename)
    rdd_crfl.setName('rdd_crfl')
    # rdd_crfl.persist()
    print "beginning: %s partitions" % rdd_crfl.getNumPartitions()

    # "value-only" RDD, not a pair RDD
    # but we have the URI in the -3 position
    # and the index in the -2 position
    rdd_withuri = rdd_crfl.map(lambda x: reconstructTuple(x))

    # Note: groupByKey returns iterable, not data; so no point in printing
    rdd_grouped = rdd_withuri.groupByKey()
    # sort the vectors by index (within key groups)
    rdd_sorted = rdd_grouped.mapValues(lambda x: [l[1:] for l in sorted(x, key=lambda r: int(r[0]))])
    # find all contiguous spans of marked-up tokens
    # returns 0 or more dicts per URI key
    rdd_spans = rdd_sorted.mapValues(lambda x: computeSpans(x, indexed=True))
    # flatten to (URI, single dict) on each line
    rdd_flat = rdd_spans.flatMapValues(lambda x: list(x))
    # rdd_flat = rdd_flat.coalesce(rdd_flat.getNumPartitions() / 3)
    # # map any eyeColor spans using smEyeColor, hairType spans using smHairColor
    # rdd_aligned = rdd_flat.mapValues(lambda x: alignToControlledVocab(x, {"eyeColor": smEyeColor, "hairType": smHairColor}))
    rdd_aligned = rdd_flat.mapValues(lambda x: alignToControlledVocab(x, {"eyeColor": smEyeColor.findBestMatch, "hairType": smHairColor.findBestMatch}))
    # rdd_aligned = rdd_flat.mapValues(lambda x: alignToControlledVocab(x, {"eyeColor": fakeFindBestMatch, "hairType": fakeFindBestMatch}))
    # rdd_aligned = rdd_flat.mapValues(lambda x: alignToControlledVocab(x, {}))
    # rdd_aligned = rdd_spans

    # rdd_final = rdd_crfl
    rdd_final = rdd_aligned
    print outputFormat
    if outputFormat == "sequence":
        rdd_final.saveAsSequenceFile(outputDirectory)
    elif outputFormat == "text":
        print "saving to %s" % outputDirectory
        rdd_final.saveAsTextFile(outputDirectory)
    else:
        raise RuntimeError("Unrecognized output format: %s" % outputFormat)

Beispiel #11

0

Datei anzeigen

Datei: 1.py Projekt: behrouzmadahian/python

from snakebite.client import Client
client = Client('localhost', 8020)  #port is the RPC port of the namenode.
for i in client.ls(['/user/cloudera/behrouz']):  #takes a list of paths!!
    print i
#get this parameters from /etc/hadoop/conf/core-site.xml under the fs.defaults
#many of the methods in snake bite return generators

#creating a directory:
#create two directories behrouz, behrouz1/b1 on HDFS:
print '*' * 40
for p in client.mkdir(['/behrouz', 'behrouz1/b1'], create_parent=True):
    print p
print '*' * 40
#deleting files and directories: deletes any subdirectories and files a directory contains
#recursively deleting the directories!
for p in client.delete(['/behrouz', 'behrouz1/b1'], recurse=True):
    print p
print '*' * 40
# retrieving data from hdfs:
#copying files from HDFS to Local file system:
for f in client.copyToLocal(['/user/cloudera/wordCount.out'],
                            '/home/cloudera/'):
    print f
print '*' * 40
#######
#reading contents of a file
for l in client.text(['/user/cloudera/testfile.txt']):
    print l
#the text method automatically decompress and display gzip and bzip2 files.

Beispiel #12

0

Datei anzeigen

Datei: del_dir.py Projekt: gauravparashar/symbiosis

from snakebite.client import Client
client = Client('localhost', 8020)
for p in client.delete(['/user/hadoop/test'], recurse=True):
    print(p)

Beispiel #13

0

Datei anzeigen

{'group': u'supergroup', 'permission': 420, 'file_type': 'f', 'access_time': 1605964109596L, 'block_replication': 2, 'modification_time': 1605946691680L, 'length': 19L, 'blocksize': 134217728L, 'owner': u'student9_7', 'path': '/student9_7/test'}
{'group': u'supergroup', 'permission': 420, 'file_type': 'f', 'access_time': 1605964267111L, 'block_replication': 3, 'modification_time': 1605964267975L, 'length': 19L, 'blocksize': 134217728L, 'owner': u'student9_7', 'path': '/student9_7/test2'}
{'group': u'supergroup', 'permission': 493, 'file_type': 'd', 'access_time': 0L, 'block_replication': 0, 'modification_time': 1605950057832L, 'length': 0L, 'blocksize': 0L, 'owner': u'student9_7', 'path': '/student9_7/testdir'}
'''

# Создадим пару директорий
for p in client.mkdir(['/student9_7/py_dir_01', '/student9_7/py_dir_02'],
                      create_parent=True):
    print(p)
'''
{'path': '/student9_7/py_dir_01', 'result': True}
{'path': '/student9_7/py_dir_02', 'result': True}
'''

# Удалим директорию `py_dir_01`
for p in client.delete(['/student9_7/py_dir_01'], recurse=True):
    print(p)
'''
{'path': '/student9_7/py_dir_01', 'result': True}
'''

# Посмотрим что содержится в файле `test`
for t in client.text(['/student9_7/test']):
    print(t)
'''
test file for hdfs
'''

# Скопируем файл `test` из хранилища в локальную домашнюю директорию под именем `retrived_file_via_py`
for f in client.copyToLocal(['/student9_7/test'], 'retrived_file_via_py'):
    print(f)

Beispiel #14

0

Datei anzeigen

#!/usr/bin/env python
from snakebite.client import Client

client = Client('localhost', 9000)

# recurse=True is equivalent to rm -rf so be careful!
for p in client.delete(['/foo', '/another'], recurse=True):
    print p

Beispiel #15

0

Datei anzeigen

Datei: wiki_latest_page_trends.py Projekt: RajeshThallam/MIDS-W251-FINAL-PROJECT

  swift_client = swift.Connection(
    user = swift_user, 
    key = swift_key, 
    authurl = swift_authurl)

# read list of files
src_files = []

if run_mode == "hdfs":
  # spotify's snakebite as hdfs client
  src_files = [ hdfs_url + files['path'] for files in hdfs_client.ls([source_files]) ]

  # deleting output directory if exists
  if (hdfs_client.test(target_dir, exists = True, directory = True)):
    hdfs_client.delete(target_dir)
    hdfs_client.rmdir(target_dir)

elif run_mode == "swift":  
  # read list of files from swift  src_files = []
  source_files = '|'.join([ '(pagecounts-' + (datetime.now() - timedelta(hours=i)).strftime("%Y%m%d-%H") + '(.*))' for i in range(48, 71) ])
  src_file_regex = re.compile(source_files)
  for data in swift_client.get_container(source_dir)[1]:
     if src_file_regex.match(data['name']):
       src_files.append(data['name'])
  
  src_files.sort(key = lambda x: os.path.basename(x))

else:
  # read list of files from local
  src_files = filter(os.path.isfile, glob.glob(os.path.join(source_dir, source_files)))