def __getJson(self):
     rvtMap = JsonObject()
     try:
         oid = self.vc("formData").get("oid")
         object = Services.storage.getObject(oid)
         payload = object.getPayload("imsmanifest.xml")
         try:
             from xml.etree import ElementTree
             xmlStr = IOUtils.toString(payload.open(), "UTF-8")
             payload.close()
             xml = ElementTree.XML(xmlStr.encode("UTF-8"))
             ns = xml.tag[:xml.tag.find("}")+1]
             resources = {}
             for res in xml.findall(ns+"resources/"+ns+"resource"):
                 resources[res.attrib.get("identifier")] = res.attrib.get("href")
             organizations = xml.find(ns+"organizations")
             defaultName = organizations.attrib.get("default")
             organizations = organizations.findall(ns+"organization")
             organizations = [o for o in organizations if o.attrib.get("identifier")==defaultName]
             organization = organizations[0]
             title = organization.find(ns+"title").text
             rvtMap.put("title", title)
             items = organization.findall(ns+"item")
             rvtMap.put("toc", self.__getJsonItems(ns, items, resources))
         except Exception, e:
              data["error"] = "Error - %s" % str(e)
              print data["error"]
         object.close()
    def process(self, inputStream, outputStream):

        text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
        # Read the CSV stream.

        delim = ','
        renum = False
        isHeader = True
        file_like_io = StringIO(text)
        csv_reader = csv.reader(file_like_io, dialect='excel', delimiter=delim)

        for row in csv_reader:
            newText = ''
            if isHeader:
                newText += ",".join(row)
                newText += "\n\r"
                isHeader = False
                outputStream.write(newText)
                continue

            try:
                datetime.datetime.strptime(row[DATE], '%Y-%m-%d %H:%M:%S')
            except ValueError:
                continue

            for value in range(1, len(row)):
                if row[value] == '':
                    continue
                if float(row[value]) < LB or float(row[value]) > UB:
                    # out of range
                    row[value] = ''
            newText += ",".join(row)
            newText += "\n\r"

            outputStream.write(newText)
Esempio n. 3
0
        def process(self, inputStream, outputStream):
            text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            obj = json.loads(text)

            data = format_geotypes(obj, GEOPOINTS)
            outputStream.write(
                bytearray(json.dumps(data, indent=4).encode('utf-8')))
Esempio n. 4
0
 def process(self, inputStream):
     
     splitFlowFile = session.create(self.parentFlowFile)
     writeCallback = WriteCallback()
     
     # To read content as a string:
     data = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     
     curl_input = ['curl', '-i', '-k', '-X', 'POST', 'http://dzaratsian80.field.hortonworks.com:4444/api', '-d', '{"image":"' + re.sub('(\r|\n)','',data) + '"}', '-H', 'content-type:application/
     ']
     
     result = subprocess.Popen(curl_input, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     
     out, err = result.communicate()
     
     prediction_label = json.loads(out.split('\r\n')[-1])['prediction_label']
     prediction_prob  = json.loads(out.split('\r\n')[-1])['prediction_prob']
     
     payload = 'payload'
     writeCallback.content = '{"image":"' + re.sub('(\r|\n)','',data) + '"}'
     splitFlowFile = session.write(splitFlowFile, writeCallback)
     splitFlowFile = session.putAllAttributes(splitFlowFile, {
             'prediction_label': str(prediction_label),
             'prediction_prob':  str(prediction_prob)
         })
     
     session.transfer(splitFlowFile, REL_SUCCESS)
Esempio n. 5
0
 def __getJson(self):
     rvtMap = HashMap()
     try:
         oid = formData.get("oid")
         object = Services.storage.getObject(oid)
         payload = object.getPayload("imsmanifest.xml")
         try:
             from xml.etree import ElementTree
             xmlStr = IOUtils.toString(payload.open(), "UTF-8")
             payload.close()
             xml = ElementTree.XML(xmlStr.encode("UTF-8"))
             ns = xml.tag[:xml.tag.find("}")+1]
             resources = {}
             for res in xml.findall(ns+"resources/"+ns+"resource"):
                 resources[res.attrib.get("identifier")] = res.attrib.get("href")
             organizations = xml.find(ns+"organizations")
             defaultName = organizations.attrib.get("default")
             organizations = organizations.findall(ns+"organization")
             organizations = [o for o in organizations if o.attrib.get("identifier")==defaultName]
             organization = organizations[0]
             title = organization.find(ns+"title").text
             rvtMap.put("title", title)
             items = organization.findall(ns+"item")
             rvtMap.put("toc", self.__getJsonItems(ns, items, resources))
         except Exception, e:
              data["error"] = "Error - %s" % str(e)
              print data["error"]
         object.close()
Esempio n. 6
0
 def process(self, inputStream):
     content = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     (corrects, incorrects,
      attributes) = Processor().transform_policy_index(
          content, self._attrs)
     self._transfer(corrects, attributes, _succ)
     self._transfer(incorrects, attributes, _fail)
    def process(self, instream, outstream):

        # To read content as a string:
        data = IOUtils.toString(instream, StandardCharsets.UTF_8)

        output = {}

        datetimestamp = datetime.datetime.now()

        output['id'] = datetimestamp.strftime('%Y%m%d_%H%M%S')
        output['datetime'] = datetimestamp.strftime('%Y-%m-%d %H:%M:%S')
        output['state'] = random.choice([
            'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI',
            'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI',
            'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC',
            'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT',
            'VT', 'VA', 'WA', 'WV', 'WI', 'WY'
        ])
        output['duration'] = round(random.triangular(1, 150, 1), 2)
        output['action'] = random.choice(
            ['TRUE'] * 1 + ['FALSE'] * 5)  # True/False ratio of 1:5

        # Generate CSV output based on "output", which is in JSON
        output_csv = ','.join([str(v) for k, v in output.items()])

        # Write modified content
        outstream.write(str(output))
    def process(self, inputStream, outputStream):
        text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
        jsonObject = json.loads(text)
        count = 0

        def keyVal(self, parentkey, jsonObj, retString):
            for key in jsonObj:
                value = jsonObj[key]
                if isinstance(value, list):
                    count = 0
                    for i in value:
                        if isinstance(i, dict) or isinstance(i, list):
                            count = count + 1
                            retString = keyVal(
                                self, parentkey + "." + key + "_" + str(count),
                                i, retString)
                    if count == 0:
                        r = {key: value}
                        retString = retString + parentkey + json.dumps(
                            r) + "\n"
                elif isinstance(value, dict):
                    retString = keyVal(self, parentkey + "." + key, value,
                                       retString)
                else:
                    r = {key: value}
                    retString = retString + parentkey + json.dumps(r) + "\n"

            return retString

        retString = keyVal(self, "PA", jsonObject, "")

        outputStream.write(bytearray(retString.encode('utf-8')))
Esempio n. 9
0
def process(flowfile):
  fn = flowfile.getAttribute('filename')
  if 'ethylene' in fn: parser = 'gas'
  elif 'hr' in fn: parser = 'heartrate'
  elif 'power' in fn: parser = 'power'
  path = parser_path + parser + '.py'
  # load the parser if it has been updated
  if parser not in sys.modules or os.path.getmtime(path) > sys.modules[parser].loaded_at:
    try:
      module = imp.load_source(parser, path)
      module.loaded_at = int(time.time())
    except:
      fail(flowfile, 'Loading Module: ' + traceback.format_exc())
      return
  parse_module = sys.modules[parser]

  # Read flowfile content
  data = {}
  instream = session.read(flowfile)
  if hasattr(parse_module, 'format') and parse_module.format.lower() == 'binary':
    data['content'] = IOUtils.toByteArray(instream)
  else:
    data['content'] = IOUtils.toString(instream, StandardCharsets.UTF_8)
  instream.close()

  # Attempt to parse
  try:
    if hasattr(parse_module, 'attributes'):
      for attribute in parse_module.attributes:
        data[attribute] = flowfile.getAttribute(attribute)
    result = parse_module.parse(data)
    flowfile = session.write(flowfile, PyStreamCallback(result))
    session.transfer(flowfile, REL_SUCCESS)
  except:
    fail(flowfile, 'Parsing: ' + traceback.format_exc())
    def process(self, inputStream):
        try:
            # Read input FlowFile content
            input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            input_list = json.loads(input_text)

            # Create FlowFiles for array items
            splits = []
            fragment_index = 0
            for item in input_list:
                color = item['color']
                splitFlowFile = session.create(self.parentFlowFile)
                writeCallback = WriteCallback()
                writeCallback.content = json.dumps(item)
                splitFlowFile = session.write(splitFlowFile, writeCallback)
                splitFlowFile = session.putAllAttributes(
                    splitFlowFile, {
                        'fragment.index': fragment_index,
                        'color': color
                    })
                splits.append(splitFlowFile)
                log.info(color)
                fragment_index += 1

            for splitFlowFile in splits:
                session.transfer(splitFlowFile, REL_SUCCESS)
        except:
            traceback.print_exc(file=sys.stdout)
            raise
    def process(self, inputStream):
        content = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
        self._attrs["mapping_data"] = json.loads(content.strip())

        (corrects, incorrects,
         attributes) = Processor().transform_vindex(attrs=self._attrs)
        self._transfer(corrects, attributes, _succ)
        self._transfer(incorrects, attributes, _fail)
Esempio n. 12
0
        def process(self, inputStream, outputStream):
            text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            obj = json.loads(text)
            flattened_data = flatten_nested_json(obj)

            outputStream.write(
                bytearray(
                    json.dumps(flattened_data, indent=4).encode('utf-8')))
Esempio n. 13
0
 def process(self, inputStream, outputStream):
     text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     obj = json.loads(text)
     newObj = {
         "Source": "NiFi",
         "Dest": "Stuff",
     }
     outputStream.write(
         bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
Esempio n. 14
0
 def process(self, inputStream, outputStream):
   text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
   corenlp = json.loads(text)
   locations = []
   for sentence in corenlp['sentences']:
       for token in sentence['tokens']:
           if token['ner'] == 'LOCATION':
               locations.append(token['word'])
   outputStream.write(bytearray(json.dumps(locations, indent=4).encode('utf-8')))
Esempio n. 15
0
        def process(self, inputStream, outputStream):
            global content
            content.stream = IOUtils.toString(inputStream, StandardCharsets.UTF_8)

            #do stuff to original file:
            outputContent = content.stream
            #-------

            outputStream.write(outputContent) #keep original content in parent file
Esempio n. 16
0
 def process(self, inputStream, outputStream):
     text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     obj = json.loads(text)
     newObj = {
         "Source": "NiFi",
         "ID": obj['uuid']
         "file_name": obj['file_name']
     }
     outputStream.write(bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
Esempio n. 17
0
 def process(self, inputStream, outputStream):
     obj = json.loads(IOUtils.toString(inputStream, StandardCharsets.UTF_8))
     if isinstance(obj, dict):
         text = json.dumps({"_id": obj.get("_id")})
     elif isinstance(obj, list):
         text = json.dumps([{"_id": x.get("_id")} for x in obj])
     else:
         text = 'none'
     outputStream.write(bytearray(text.encode('utf-8')))
    def process(self, file):
        def luhnChecksumIsValid(cardNumber):
            # check to make sure that the card passes a luhn mod-10 checksum
            total = 0
            oddTotal = 0
            evenTotal = 0
            reversedCardNumber = cardNumber[::-1]
            oddDigits = reversedCardNumber[0::2]
            evenDigits = reversedCardNumber[1::2]
            for count in range(0, len(oddDigits)):
                oddTotal += int(oddDigits[count])
            for count in range(0, len(evenDigits)):
                evenDigit = int(evenDigits[count])
                evenDigit = evenDigit * 2
                if evenDigit > 9:
                    evenDigit = evenDigit - 9
                evenTotal += evenDigit
            total = oddTotal + evenTotal
            return (total % 10 == 0)

        # Skip non-files
        if ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
                or
            (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
                or (file.isFile() == False)):
            return IngestModule.ProcessResult.OK
        inputStream = ReadContentInputStream(file)
        text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
        if self.skipBinaries:
            if b'\x00' in text:
                return IngestModule.ProcessResult.OK
        initialCCPattern = '[1-6](?:\d[ -]*?){13,23}'
        possibleCCs = re.findall(initialCCPattern, text, re.IGNORECASE)
        self.fileFlagged = 0
        if possibleCCs:
            for cc in possibleCCs:
                delim_regex = "\D+"
                cc = re.sub(delim_regex, '', cc)
                if luhnChecksumIsValid(cc):
                    if self.fileFlagged == 0:
                        self.filesFound += 1
                        art = file.newArtifact(
                            BlackboardArtifact.ARTIFACT_TYPE.
                            TSK_INTERESTING_FILE_HIT)
                        att = BlackboardAttribute(
                            BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.
                            getTypeID(),
                            PaymentCardFileIngestModuleFactory.moduleName,
                            "Files With Possible Payment Card Numbers")
                        art.addAttribute(att)
                        IngestServices.getInstance().fireModuleDataEvent(
                            ModuleDataEvent(
                                PaymentCardFileIngestModuleFactory.moduleName,
                                BlackboardArtifact.ARTIFACT_TYPE.
                                TSK_INTERESTING_FILE_HIT, None))
                        self.fileFlagged = 1
        return IngestModule.ProcessResult.OK
    def process(self, inputStream):
        splitFlowFile = session.create(self.parentFlowFile)
        writeCallback = WriteCallback()
        try:
            # To read content as a string:
            data = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            vars = data.split(',')

            down = vars[0]
            qtr = vars[1]
            month_day = vars[2]
            playtype_lag_index = vars[3]
            timesecs = vars[4]
            ydsnet = vars[5]
            ydstogo = vars[6]
            yrdline100 = vars[7]

            accessToken = splitFlowFile.getAttribute('accessToken')

            curl_input = [
                'curl', '-i', '-k', '-X', 'POST',
                'https://172.26.228.121/v2/scoring/online/32b1d108-369d-42ce-966b-48d0a20a6b38',
                '-d',
                '{"fields":["down","qtr","month_day","PlayType_lag_index","TimeSecs","ydsnet","ydstogo","yrdline100"],"records":[['
                + str(down) + ',' + str(qtr) + ',' + str(month_day) + ',"' +
                str(playtype_lag_index) + '",' + str(timesecs) + ',' +
                str(ydsnet) + ',' + str(ydstogo) + ',' + str(yrdline100) +
                ']]}', '-H', 'content-type:application/json', '-H',
                str('authorization: Bearer ' + str(accessToken))
            ]

            result = subprocess.Popen(curl_input,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE)

            out, err = result.communicate()

            prediction_results = json.loads(
                out.split('\r\n')[-1])['records'][0]
            predictions_yards_gained = str(prediction_results[-1])

            #predictions_csv = str(predictions[0][0]) + ',' + str(predictions[0][1]) + ',' + str(predictions[1]) + ',' + str(predictions[2])
            payload = ','.join([
                str(record) for i, record in enumerate(prediction_results)
                if i != 8
            ])
            writeCallback.content = payload
            splitFlowFile = session.write(splitFlowFile, writeCallback)
            splitFlowFile = session.putAllAttributes(
                splitFlowFile,
                {'predictions_yards_gained': predictions_yards_gained})

        except:
            pass

        session.transfer(splitFlowFile, REL_SUCCESS)
Esempio n. 20
0
 def process(self, inputStream):
     listCustNoOlist = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     result = getFilteredCustNoList(
         convertTextToList(listCustNoOlist),
         convertTextToList(getNewmartCustListFromState()))
     if result:
         result = cleanText(str(result))
         newFlowFile = session.create(self.parentFlowFile)
         newFlowFile = session.putAttribute(newFlowFile, 'cust_no', result)
         session.transfer(newFlowFile, REL_SUCCESS)
    def process(self, inputStream, outputStream):
        try:
            # Read input FlowFile content
            input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            input_obj = json.loads(input_text)
            output_text = "{},{},{},{}".format(input_obj['name'],input_obj['value'],input_obj['message'],input_obj['timestamp'])

            outputStream.write(bytearray(output_text.encode('utf-8')))
        except:
            traceback.print_exc(file=sys.stdout)
            raise
Esempio n. 22
0
    def process(self, inputStream, outputStream):
        try:
            # Read input FlowFile content
            input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            input_obj = json.loads(input_text)
            output_text = input_obj['name']

            outputStream.write(bytearray(output_text.encode('utf-8')))
        except:
            traceback.print_exc(file=sys.stdout)
            raise
Esempio n. 23
0
 def process(self, inputStream, outputStream):
     text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     obj = json.loads(text)
     newObj = {
         "Source": "NiFi",
         "ID": "python",
         "Name": "test",
         "meta_data": obj['rating']['metric']['value']
     }
     outputStream.write(
         bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
Esempio n. 24
0
 def process(self, inputStream, outputStream):
     text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     data = json.loads(text)
     # newObj=['age'].fillna(data['age'].mean(), inplace=True)
     #outputStream.write(bytearray(json.dumps(data).encode('utf-8')))
     outputStream.write(
         bytearray(
             json.dumps(data,
                        sort_keys=True,
                        indent=3,
                        separators=(',', ':'),
                        skipkeys=True)))
Esempio n. 25
0
    def process(self, inputStream, outputStream):
        try:
            # Read input FlowFile content
            input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            input_obj = json.loads(input_text)

            # Transform content
            # for i in range(0, len(input_obj)):
            output_arr = []
            count = 0
            while (count <= 59):
                detester = input_obj['create_time']
                date = (datetime.datetime.strptime(detester, '%Y-%m-%d %H:%M:%S.%f') + datetime.timedelta(
                    minutes=count))
                output_obj = {
                    "id": input_obj['id'],
                    "meas_type": input_obj['meas_type'],
                    "create_time": date.strftime("%Y-%m-%d %H:%M:%S.%f"),
                    "create_day": datetime.datetime.strptime(date.strftime("%Y-%m-%d"), '%Y-%m-%d').strftime(
                        "%Y-%m-%d %H:%M:%S.%f"),
                    "create_minute": date.strftime("%H:%M:%S.%f")
                }
                if count == 0:
                    output_obj['vtype'] = '时间'
                elif count == 1:
                    output_obj['vtype'] = '最大值'
                elif count == 2:
                    output_obj['vtype'] = '最小值'
                else:
                    output_obj['vtype'] = '平均值'

                if count < 10:
                    objkey = 'v0' + str(count)
                    if input_obj[objkey] is None:
                        output_obj['val'] = 0
                    else:
                        output_obj['val'] = input_obj[objkey]
                else:
                    objkey = 'v' + str(count)
                    if input_obj[objkey] is None:
                        output_obj['val'] = 0
                    else:
                        output_obj['val'] = input_obj[objkey]
                # Write output content
                output_arr.append(output_obj)
                count = count + 1
            output_text = json.dumps(output_arr)
            outputStream.write(StringUtil.toBytes(output_text))

        except:
            traceback.print_exc(file=sys.stdout)
            raise
 def process(self, inputStream, outputStream):
     data = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     jsondata = json.loads(data)
     # we keep the informationof authorized source.
     # in real implementation it is the details from the sensors
     authorized_source = ['SENSOR_TEMP00X912', 'SENSOR_HUM00H212', 'SENSOR_CAM00S212']
     # check if source is present in our authorized list
     if jsondata[0]["source"] in authorized_source:
         jsondata[0]["issourcevalid"] = "VALID_SOURCE"
     else:
         jsondata[0]["issourcevalid"] = "INVALID_SOURCE"
     # write result
     outputStream.write(bytearray(json.dumps(jsondata, indent=3).encode('utf-8')))
Esempio n. 27
0
    def process(self, inputStream, outputStream):
        try:
            # Read input FlowFile content
            input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            input_obj = json.loads(input_text)

            # Transform content
            output_obj = input_obj
            # Write output content
            outputStream.write(StringUtil.(output_obj['values']))
        except:
            traceback.print_exc(file=sys.stdout)
            raise
 def process(self, inputStream, outputStream):
     text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     elasticjson = json.loads(text)
     geojson_text = self.flowfile  #.getAttribute('geojson')
     geojson = json.loads(geojson_text)
     if (len(geojson['interpretations']) > 0):
         latitude = str(geojson['interpretations'][0]['feature']['geometry']
                        ['center']['lat'])
         longitude = str(geojson['interpretations'][0]['feature']
                         ['geometry']['center']['lng'])
         elasticjson['geo_location'] = latitude + ',' + longitude
     outputStream.write(
         bytearray(json.dumps(elasticjson, indent=4).encode('utf-8')))
    def process(self, inputStream, outputStream):
        text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
        obj = json.loads(text)
        newObj = {
            "Range": 5,
            "Rating": obj['rating']['primary']['value'],
            "SecondaryRatings": {}
        }
        for key, value in obj['rating'].iteritems():
            if key != "primary":
                newObj['SecondaryRatings'][key] = {"Id": key, "Range": 5, "Value": value['value']}

        outputStream.write(bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
Esempio n. 30
0
  def process(self, inputStream, outputStream):
    text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
    obj = json.loads(text)
    newObj = obj

    if 'ts_lastlog' in obj:
        newObj['ts_lastlog'] = time_to_date(obj['ts_lastlog'])
    if 'ts_lastseen' in obj:
        newObj['ts_lastseen'] = time_to_date(obj['ts_lastseen'])
    if 'ts_firstseen' in obj:
        newObj['ts_firstseen'] = time_to_date(obj['ts_firstseen'])

    outputStream.write(bytearray(json.dumps(newObj).encode('utf-8')))
Esempio n. 31
0
        def process(self, inputStream, outputStream):  # noqa
            """
            Takes a flowfile as an input and outputs the transform flowfile
            """
            text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)  # noqa
            data = json.loads("[{}]".format(text))
            transformed_data = transform_package_responses(data,
                                                      submission_id,
                                                      long_field_names_map)

            outputStream.write(
                bytearray(json.dumps(transformed_data, indent=4).
                          encode('utf-8')))
Esempio n. 32
0
    def process(self, inputStream, outputStream):

        text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
        #Read the CSV stream.

        delim = ','
        nstart = 0
        skipheader = False
        renum = False
        title = ''
        completedoc = False
        attrs = {'table': 'border=1'}
        file_like_io = StringIO(text)
        csv_reader = csv.reader(file_like_io, dialect='excel', delimiter=delim)
        nrow = 0  # The row number counter.

        if PYTHON2:

            def next_row():
                return csv_reader.next()
        else:

            def next_row():
                return csv_reader.__next__()

        outputStream.write(tablegen.start(completedoc, title, attrs))

        if not skipheader:
            row = next_row()
            outputStream.write(tablegen.row(row, True, attrs))
            nrow += 1
        while nrow < nstart:
            next_row()
            nrow += 1
        for row in csv_reader:
            if renum:
                # If there is no zeroth header row, add 1 to the new row number
                # to correct for the rows being counted from zero. Do the same if
                # we're counting from nstart.
                row[0] = str(nrow - nstart + int(skipheader or nstart > 0))
            outputStream.write(tablegen.row(row, False, attrs))
            nrow += 1
        outputStream.write(tablegen.end(completedoc))

        #obj = json.loads(text)
        #newObj = {
        #      "Source": "NiFi",
        #      "ID": obj['id'],
        #      "Name": obj['user']['screen_name']
        #    }
        '''outputStream.write(bytearray(json.dumps(newObj, indent=4).encode('utf-8')))'''
 def process(self, inputStream, outputStream):
   data = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
   jsondata = json.loads(data)
   #validate the data based on checksum
   if jsondata[0]["issourcevalid"] == "VALID_SOURCE":
       datatohash = jsondata[0]["data"]
       if jsondata[0]["hashkey"] == hashlib.md5(datatohash.encode()).hexdigest().upper():
           jsondata[0]["isdatavalid"] = "VALID_DATA"
       else:
           jsondata[0]["isdatavalid"] = "INVALID_DATA"
   else:
       jsondata[0]["isdatavalid"] = "INVALID_DATA"
   # write result
   outputStream.write(bytearray(json.dumps(jsondata, indent=3).encode('utf-8')))
Esempio n. 34
0
  def get_from_hdfs(self, file_loc):
    """
    Try to get the text content from HDFS based on file_loc
    Return schema literal string
    """

    fp = HdfsPath(file_loc)
    try:
      if self.fs.exists(fp):
        in_stream = self.fs.open(fp)
        return IOUtils.toString(in_stream, 'UTF-8')
      else:
        return None
    except:
      return None    
Esempio n. 35
0
 def get_from_hdfs(self, file_loc):
   """
   Try to get the text content from HDFS based on file_loc
   Return schema literal string
   """
   fp = HdfsPath(file_loc)
   try:
     if self.fs.exists(fp):
       in_stream = self.fs.open(fp)
       self.logger.info('GET schema literal from {}'.format(file_loc))
       return IOUtils.toString(in_stream, 'UTF-8')
     else:
       self.logger.info('Schema not exists: {}'.format(file_loc))
       return None
   except Exception as e:
     self.logger.error(str(e))
     return None
Esempio n. 36
0
  def process(self, instream, outstream):
    # To read content as a byte array:
    # data = IOUtils.toByteArray(instream)

    # To read content as a string:
    data = IOUtils.toString(instream, StandardCharsets.UTF_8)

    # Do wordcount
    words = {}
    for word in data.strip().split():
      if word not in words:
        words[word] = 1
      else:
        words[word] += 1

    # Write modified content
    outstream.write(bytearray(json.dumps(words)))
Esempio n. 37
0
    def process(self, inputStream, outputStream):
        try:
            # Read input FlowFile content
            input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
            input_obj = json.loads(input_text)

            # Transform content
            output_obj = input_obj
            output_obj['value'] = output_obj['value'] * output_obj['value']
            output_obj['message'] = 'Hello World'

            # Write output content
            output_text = json.dumps(output_obj)
            outputStream.write(StringUtil.toBytes(output_text))
        except:
            traceback.print_exc(file=sys.stdout)
            raise
Esempio n. 38
0
 def __wget(self, url):
     client = BasicHttpClient(url)
     m = GetMethod(url)
     client.executeMethod(m)
     return IOUtils.toString(m.getResponseBodyAsStream(), "UTF-8")
Esempio n. 39
0
 def __getPayloadAsString(self, payload):
     payloadStr = IOUtils.toString(payload.open(), "UTF-8")
     payload.close();
     return payloadStr
Esempio n. 40
0
 def process(self,inputStream,outputStream):
     text = IOUtils.toString(inputStream,StandardCharsets.UTF_8)
     f.write(text)
 def process(self, inputStream, outputStream):
     text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
     outputStream.write(bytearray('Hello World!'[::-1].encode('utf-8')))